The following commit has been merged in the master branch: commit d2d047ed5dc62c80e07b87421201f1bdabb55f55 Merge: 0289a9dd2d977dc87941dc73dffe1ab2383b1c48 3cd52c1e32fe7dfee09815ced702db9ee9f84ec9 Author: Stephen Rothwell sfr@canb.auug.org.au Date: Thu Apr 8 13:20:07 2021 +1000
Merge remote-tracking branch 'net-next/master'
# Conflicts: # MAINTAINERS # drivers/net/ethernet/mellanox/mlx5/core/en_main.c # include/linux/bpf.h # include/linux/ethtool.h # include/linux/skmsg.h # net/core/skmsg.c # net/tipc/crypto.c
diff --combined Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml index 13c26f23a820,2a3be0f9a1a1..2f46e45dcd60 --- a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml +++ b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml @@@ -22,17 -22,25 +22,25 @@@ properties maxItems: 1
interrupts: - description: RX interrupt + minItems: 1 + maxItems: 2 + items: + - description: RX interrupt + - description: TX interrupt
interrupt-names: - const: rx + minItems: 1 + maxItems: 2 + items: + - const: rx + - const: tx
required: - reg - interrupts - interrupt-names
-additionalProperties: false +unevaluatedProperties: false
examples: - | @@@ -43,6 -51,7 +51,7 @@@ compatible = "brcm,bcm4908-enet"; reg = <0x80002000 0x1000>;
- interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "rx"; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "rx", "tx"; }; diff --combined Documentation/networking/ethtool-netlink.rst index dc03ff884541,fd84f4ed898a..ce4a69f8308f --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@@ -208,6 -208,8 +208,8 @@@ Userspace to kernel ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info + ``ETHTOOL_MSG_FEC_GET`` get FEC settings + ``ETHTOOL_MSG_FEC_SET`` set FEC settings ===================================== ================================
Kernel to userspace: @@@ -242,6 -244,8 +244,8 @@@ ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info + ``ETHTOOL_MSG_FEC_GET_REPLY`` FEC settings + ``ETHTOOL_MSG_FEC_NTF`` FEC settings ===================================== =================================
``GET`` requests are sent by userspace applications to retrieve device @@@ -976,9 -980,9 +980,9 @@@ constraints on coalescing parameters an
PAUSE_GET -============ +=========
-Gets channel counts like ``ETHTOOL_GPAUSE`` ioctl request. +Gets pause frame settings like ``ETHTOOL_GPAUSEPARAM`` ioctl request.
Request contents:
@@@ -1007,7 -1011,7 +1011,7 @@@ the statistics in the following structu Each member has a corresponding attribute defined.
PAUSE_SET -============ +=========
Sets pause parameters like ``ETHTOOL_GPAUSEPARAM`` ioctl request.
@@@ -1024,7 -1028,7 +1028,7 @@@ Request contents EEE_GET =======
-Gets channel counts like ``ETHTOOL_GEEE`` ioctl request. +Gets Energy Efficient Ethernet settings like ``ETHTOOL_GEEE`` ioctl request.
Request contents:
@@@ -1054,7 -1058,7 +1058,7 @@@ first 32 are provided by the ``ethtool_ EEE_SET =======
-Sets pause parameters like ``ETHTOOL_GEEEPARAM`` ioctl request. +Sets Energy Efficient Ethernet parameters like ``ETHTOOL_SEEE`` ioctl request.
Request contents:
@@@ -1280,6 -1284,60 +1284,60 @@@ Kernel response contents For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that the table contains static entries, hard-coded by the NIC.
+ FEC_GET + ======= + + Gets FEC configuration and state like ``ETHTOOL_GFECPARAM`` ioctl request. + + Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ===================================== ====== ========================== + + Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ``ETHTOOL_A_FEC_ACTIVE`` u32 index of active FEC mode + ===================================== ====== ========================== + + ``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently + active on the interface. This attribute may not be present if device does + not support FEC. + + ``ETHTOOL_A_FEC_MODES`` and ``ETHTOOL_A_FEC_AUTO`` are only meaningful when + autonegotiation is disabled. If ``ETHTOOL_A_FEC_AUTO`` is non-zero driver will + select the FEC mode automatically based on the parameters of the SFP module. + This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface. + ``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode + bits (rather than old ``ETHTOOL_FEC_*`` bits). + + FEC_SET + ======= + + Sets FEC parameters like ``ETHTOOL_SFECPARAM`` ioctl request. + + Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ===================================== ====== ========================== + + ``FEC_SET`` is only meaningful when autonegotiation is disabled. Otherwise + FEC mode is selected as part of autonegotiation. + + ``ETHTOOL_A_FEC_MODES`` selects which FEC mode should be used. It's recommended + to set only one bit, if multiple bits are set driver may choose between them + in an implementation specific way. + + ``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP + module parameters. This does not mean autonegotiation. + Request translation ===================
@@@ -1373,9 -1431,9 +1431,9 @@@ are netlink only ``ETHTOOL_MSG_LINKMODES_SET`` ``ETHTOOL_PHY_GTUNABLE`` n/a ``ETHTOOL_PHY_STUNABLE`` n/a - ``ETHTOOL_GFECPARAM`` n/a - ``ETHTOOL_SFECPARAM`` n/a - n/a ''ETHTOOL_MSG_CABLE_TEST_ACT'' - n/a ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT'' + ``ETHTOOL_GFECPARAM`` ``ETHTOOL_MSG_FEC_GET`` + ``ETHTOOL_SFECPARAM`` ``ETHTOOL_MSG_FEC_SET`` + n/a ``ETHTOOL_MSG_CABLE_TEST_ACT`` + n/a ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` =================================== ===================================== diff --combined MAINTAINERS index f33788aa5a49,217c7470bfa9..5e38575ed162 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -1142,7 -1142,7 +1142,7 @@@ W: http://ez.analog.com/community/linux F: Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523 F: Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4350 F: Documentation/devicetree/bindings/iio/*/adi,* -F: Documentation/devicetree/bindings/iio/dac/ad5758.txt +F: Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml F: drivers/iio/*/ad* F: drivers/iio/adc/ltc249* F: drivers/iio/amplifiers/hmc425a.c @@@ -1530,6 -1530,7 +1530,7 @@@ F: Documentation/devicetree/bindings/dm F: Documentation/devicetree/bindings/i2c/i2c-owl.yaml F: Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml F: Documentation/devicetree/bindings/mmc/owl-mmc.yaml + F: Documentation/devicetree/bindings/net/actions,owl-emac.yaml F: Documentation/devicetree/bindings/pinctrl/actions,* F: Documentation/devicetree/bindings/power/actions,owl-sps.txt F: Documentation/devicetree/bindings/timer/actions,owl-timer.txt @@@ -1542,6 -1543,7 +1543,7 @@@ F: drivers/dma/owl-dma. F: drivers/i2c/busses/i2c-owl.c F: drivers/irqchip/irq-owl-sirq.c F: drivers/mmc/host/owl-mmc.c + F: drivers/net/ethernet/actions/ F: drivers/pinctrl/actions/* F: drivers/soc/actions/ F: include/dt-bindings/power/owl-* @@@ -1576,13 -1578,11 +1578,13 @@@ R: Jernej Skrabec <jernej.skrabec@siol. L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git +L: linux-sunxi@lists.linux.dev F: arch/arm/mach-sunxi/ F: arch/arm64/boot/dts/allwinner/ F: drivers/clk/sunxi-ng/ F: drivers/pinctrl/sunxi/ F: drivers/soc/sunxi/ +N: allwinner N: sun[x456789]i N: sun50i
@@@ -2298,7 -2298,6 +2300,7 @@@ F: drivers/tty/serial/msm_serial. F: drivers/usb/dwc3/dwc3-qcom.c F: include/dt-bindings/*/qcom* F: include/linux/*/qcom* +F: include/linux/soc/qcom/
ARM/RADISYS ENP2611 MACHINE SUPPORT M: Lennert Buytenhek kernel@wantstofly.org @@@ -2378,7 -2377,7 +2380,7 @@@ F: sound/soc/rockchip N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org S: Maintained @@@ -2492,7 -2491,7 +2494,7 @@@ N: sc27x N: sc2731
ARM/STI ARCHITECTURE -M: Patrice Chotard patrice.chotard@st.com +M: Patrice Chotard patrice.chotard@foss.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained W: http://www.stlinux.com @@@ -2525,7 -2524,7 +2527,7 @@@ F: include/linux/remoteproc/st_slim_rpr
ARM/STM32 ARCHITECTURE M: Maxime Coquelin mcoquelin.stm32@gmail.com -M: Alexandre Torgue alexandre.torgue@st.com +M: Alexandre Torgue alexandre.torgue@foss.st.com L: linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@@ -3118,7 -3117,7 +3120,7 @@@ C: irc://irc.oftc.net/bcach F: drivers/md/bcache/
BDISP ST MEDIA DRIVER -M: Fabien Dessenne fabien.dessenne@st.com +M: Fabien Dessenne fabien.dessenne@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -3236,6 -3235,7 +3238,7 @@@ T: git git://git.kernel.org/pub/scm/lin T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git F: Documentation/bpf/ F: Documentation/networking/filter.rst + F: Documentation/userspace-api/ebpf/ F: arch/*/net/* F: include/linux/bpf* F: include/linux/filter.h @@@ -3250,6 -3250,7 +3253,7 @@@ F: net/core/filter. F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ + F: scripts/bpf_doc.py F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ @@@ -3678,7 -3679,7 +3682,7 @@@ M: bcm-kernel-feedback-list@broadcom.co L: linux-pm@vger.kernel.org S: Maintained T: git git://github.com/broadcom/stblinux.git -F: drivers/soc/bcm/bcm-pmb.c +F: drivers/soc/bcm/bcm63xx/bcm-pmb.c F: include/dt-bindings/soc/bcm-pmb.h
BROADCOM SPECIFIC AMBA DRIVER (BCMA) @@@ -4184,21 -4185,12 +4188,21 @@@ X: drivers/char/tpm CHECKPATCH M: Andy Whitcroft apw@canonical.com M: Joe Perches joe@perches.com +R: Dwaipayan Ray dwaipayanray1@gmail.com +R: Lukas Bulwahn lukas.bulwahn@gmail.com S: Maintained F: scripts/checkpatch.pl
+CHECKPATCH DOCUMENTATION +M: Dwaipayan Ray dwaipayanray1@gmail.com +M: Lukas Bulwahn lukas.bulwahn@gmail.com +R: Joe Perches joe@perches.com +S: Maintained +F: Documentation/dev-tools/checkpatch.rst + CHINESE DOCUMENTATION M: Harry Wei harryxiyou@gmail.com -M: Alex Shi alex.shi@linux.alibaba.com +M: Alex Shi alexs@kernel.org L: xiyoulinuxkernelgroup@googlegroups.com (subscribers-only) S: Maintained F: Documentation/translations/zh_CN/ @@@ -4446,7 -4438,7 +4450,7 @@@ F: include/linux/clk F: include/linux/of_clk.h X: drivers/clk/clkdev.c
-COMMON INTERNET FILE SYSTEM (CIFS) +COMMON INTERNET FILE SYSTEM CLIENT (CIFS) M: Steve French sfrench@samba.org L: linux-cifs@vger.kernel.org L: samba-technical@lists.samba.org (moderated for non-subscribers) @@@ -4456,16 -4448,6 +4460,16 @@@ T: git git://git.samba.org/sfrench/cifs F: Documentation/admin-guide/cifs/ F: fs/cifs/
+COMMON INTERNET FILE SYSTEM SERVER (CIFSD) +M: Namjae Jeon namjae.jeon@samsung.com +M: Sergey Senozhatsky sergey.senozhatsky@gmail.com +M: Steve French sfrench@samba.org +M: Hyunchul Lee hyc.lee@gmail.com +L: linux-cifs@vger.kernel.org +L: linux-cifsd-devel@lists.sourceforge.net +S: Maintained +F: fs/cifsd/ + COMPACTPCI HOTPLUG CORE M: Scott Murray scott@spiteful.org L: linux-pci@vger.kernel.org @@@ -4607,11 -4589,6 +4611,11 @@@ F: drivers/counter F: include/linux/counter.h F: include/linux/counter_enum.h
+CP2615 I2C DRIVER +M: Bence Cs��k��s bence98@sch.bme.hu +S: Maintained +F: drivers/i2c/busses/i2c-cp2615.c + CPMAC ETHERNET DRIVER M: Florian Fainelli f.fainelli@gmail.com L: netdev@vger.kernel.org @@@ -5107,7 -5084,7 +5111,7 @@@ S: Maintaine F: drivers/platform/x86/dell/dell-wmi.c
DELTA ST MEDIA DRIVER -M: Hugues Fruchet hugues.fruchet@st.com +M: Hugues Fruchet hugues.fruchet@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -5405,7 -5382,7 +5409,7 @@@ F: drivers/hwmon/dme1737. DMI/SMBIOS SUPPORT M: Jean Delvare jdelvare@suse.com S: Maintained -T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-dmi/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging.git dmi-for-next F: Documentation/ABI/testing/sysfs-firmware-dmi-tables F: drivers/firmware/dmi-id.c F: drivers/firmware/dmi_scan.c @@@ -5431,12 -5408,6 +5435,12 @@@ X: Documentation/power X: Documentation/spi/ X: Documentation/userspace-api/media/
+DOCUMENTATION REPORTING ISSUES +M: Thorsten Leemhuis linux@leemhuis.info +L: linux-doc@vger.kernel.org +S: Maintained +F: Documentation/admin-guide/reporting-issues.rst + DOCUMENTATION SCRIPTS M: Mauro Carvalho Chehab mchehab@kernel.org L: linux-doc@vger.kernel.org @@@ -5504,11 -5475,11 +5508,11 @@@ F: drivers/net/ethernet/freescale/dpaa2 F: drivers/net/ethernet/freescale/dpaa2/dpni*
DPAA2 ETHERNET SWITCH DRIVER - M: Ioana Radulescu ruxandra.radulescu@nxp.com M: Ioana Ciornei ioana.ciornei@nxp.com - L: linux-kernel@vger.kernel.org + L: netdev@vger.kernel.org S: Maintained - F: drivers/staging/fsl-dpaa2/ethsw + F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch* + F: drivers/net/ethernet/freescale/dpaa2/dpsw*
DPT_I2O SCSI RAID DRIVER M: Adaptec OEM Raid Solutions aacraid@microsemi.com @@@ -5812,7 -5783,7 +5816,7 @@@ DRM DRIVER FOR ST-ERICSSON MCD M: Linus Walleij linus.walleij@linaro.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc -F: Documentation/devicetree/bindings/display/ste,mcde.txt +F: Documentation/devicetree/bindings/display/ste,mcde.yaml F: drivers/gpu/drm/mcde/
DRM DRIVER FOR TDFX VIDEO CARDS @@@ -6039,6 -6010,7 +6043,6 @@@ F: drivers/gpu/drm/rockchip
DRM DRIVERS FOR STI M: Benjamin Gaignard benjamin.gaignard@linaro.org -M: Vincent Abriou vincent.abriou@st.com L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@@ -6046,9 -6018,10 +6050,9 @@@ F: Documentation/devicetree/bindings/di F: drivers/gpu/drm/sti
DRM DRIVERS FOR STM -M: Yannick Fertre yannick.fertre@st.com -M: Philippe Cornu philippe.cornu@st.com +M: Yannick Fertre yannick.fertre@foss.st.com +M: Philippe Cornu philippe.cornu@foss.st.com M: Benjamin Gaignard benjamin.gaignard@linaro.org -M: Vincent Abriou vincent.abriou@st.com L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@@ -6085,7 -6058,7 +6089,7 @@@ DRM DRIVERS FOR V3 M: Eric Anholt eric@anholt.net S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc -F: Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt +F: Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml F: drivers/gpu/drm/v3d/ F: include/uapi/drm/v3d_drm.h
@@@ -7227,7 -7200,7 +7231,7 @@@ M: Li Yang <leoyang.li@nxp.com L: linuxppc-dev@lists.ozlabs.org L: linux-arm-kernel@lists.infradead.org S: Maintained -F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.txt +F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml F: Documentation/devicetree/bindings/soc/fsl/ F: drivers/soc/fsl/ F: include/linux/fsl/ @@@ -7359,13 -7332,6 +7363,13 @@@ F: fs/verity F: include/linux/fsverity.h F: include/uapi/linux/fsverity.h
+FT260 FTDI USB-HID TO I2C BRIDGE DRIVER +M: Michael Zaidman michael.zaidman@gmail.com +L: linux-i2c@vger.kernel.org +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/hid-ft260.c + FUJITSU LAPTOP EXTRAS M: Jonathan Woithe jwoithe@just42.net L: platform-driver-x86@vger.kernel.org @@@ -7514,9 -7480,8 +7518,9 @@@ F: include/uapi/asm-generic GENERIC PHY FRAMEWORK M: Kishon Vijay Abraham I kishon@ti.com M: Vinod Koul vkoul@kernel.org -L: linux-kernel@vger.kernel.org +L: linux-phy@lists.infradead.org S: Supported +Q: https://patchwork.kernel.org/project/linux-phy/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git F: Documentation/devicetree/bindings/phy/ F: drivers/phy/ @@@ -8129,6 -8094,7 +8133,6 @@@ F: drivers/crypto/hisilicon/zip
HISILICON ROCE DRIVER M: Lijun Ou oulijun@huawei.com -M: Wei Hu(Xavier) huwei87@hisilicon.com M: Weihang Li liweihang@huawei.com L: linux-rdma@vger.kernel.org S: Maintained @@@ -8246,7 -8212,7 +8250,7 @@@ M: Lorenzo Bianconi <lorenzo.bianconi83 L: linux-iio@vger.kernel.org S: Maintained W: http://www.st.com/ -F: Documentation/devicetree/bindings/iio/humidity/hts221.txt +F: Documentation/devicetree/bindings/iio/humidity/st,hts221.yaml F: drivers/iio/humidity/hts221*
HUAWEI ETHERNET DRIVER @@@ -8268,7 -8234,7 +8272,7 @@@ F: include/linux/hugetlb. F: mm/hugetlb.c
HVA ST MEDIA DRIVER -M: Jean-Christophe Trotin jean-christophe.trotin@st.com +M: Jean-Christophe Trotin jean-christophe.trotin@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -8712,7 -8678,7 +8716,7 @@@ M: Peter Rosin <peda@axentia.se L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-dac-dpot-dac -F: Documentation/devicetree/bindings/iio/dac/dpot-dac.txt +F: Documentation/devicetree/bindings/iio/dac/dpot-dac.yaml F: drivers/iio/dac/dpot-dac.c
IIO ENVELOPE DETECTOR @@@ -8720,7 -8686,7 +8724,7 @@@ M: Peter Rosin <peda@axentia.se L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector -F: Documentation/devicetree/bindings/iio/adc/envelope-detector.txt +F: Documentation/devicetree/bindings/iio/adc/envelope-detector.yaml F: drivers/iio/adc/envelope-detector.c
IIO MULTIPLEXER @@@ -8730,12 -8696,6 +8734,12 @@@ S: Maintaine F: Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt F: drivers/iio/multiplexer/iio-mux.c
+IIO SCMI BASED DRIVER +M: Jyoti Bhayana jbhayana@google.com +L: linux-iio@vger.kernel.org +S: Maintained +F: drivers/iio/common/scmi_sensors/scmi_iio.c + IIO SUBSYSTEM AND DRIVERS M: Jonathan Cameron jic23@kernel.org R: Lars-Peter Clausen lars@metafoo.de @@@ -8755,9 -8715,9 +8759,9 @@@ IIO UNIT CONVERTE M: Peter Rosin peda@axentia.se L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt -F: Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt -F: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt +F: Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.yaml +F: Documentation/devicetree/bindings/iio/afe/current-sense-shunt.yaml +F: Documentation/devicetree/bindings/iio/afe/voltage-divider.yaml F: drivers/iio/afe/iio-rescale.c
IKANOS/ADI EAGLE ADSL USB DRIVER @@@ -9346,7 -9306,7 +9350,7 @@@ INVENSENSE MPU-3050 GYROSCOPE DRIVE M: Linus Walleij linus.walleij@linaro.org L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.txt +F: Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml F: drivers/iio/gyro/mpu3050*
IOC3 ETHERNET DRIVER @@@ -10074,6 -10034,7 +10078,6 @@@ F: scripts/leaking_addresses.p
LED SUBSYSTEM M: Pavel Machek pavel@ucw.cz -R: Dan Murphy dmurphy@ti.com L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git @@@ -10912,7 -10873,7 +10916,7 @@@ F: drivers/regulator/max77802-regulator F: include/dt-bindings/*/*max77802.h
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: linux-pm@vger.kernel.org S: Supported @@@ -10921,7 -10882,7 +10925,7 @@@ F: drivers/power/supply/max77693_charge
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS M: Chanwoo Choi cw00.choi@samsung.com -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: linux-kernel@vger.kernel.org S: Supported @@@ -10949,7 -10910,7 +10953,7 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/radio/radio-maxiradio*
MCAN MMIO DEVICE DRIVER - M: Pankaj Sharma pankj.sharma@samsung.com + M: Chandrasekar Ramakrishnan rcsekar@samsung.com L: linux-can@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@@ -11147,12 -11108,12 +11151,12 @@@ F: drivers/media/platform/renesas-ceu. F: include/media/drv-intf/renesas-ceu.h
MEDIA DRIVERS FOR RENESAS - DRIF -M: Ramesh Shanmugasundaram rashanmu@gmail.com +M: Fabrizio Castro fabrizio.castro.jz@renesas.com L: linux-media@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Supported T: git git://linuxtv.org/media_tree.git -F: Documentation/devicetree/bindings/media/renesas,drif.txt +F: Documentation/devicetree/bindings/media/renesas,drif.yaml F: drivers/media/platform/rcar_drif.c
MEDIA DRIVERS FOR RENESAS - FCP @@@ -11209,7 -11170,7 +11213,7 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/dvb-frontends/stv6111*
MEDIA DRIVERS FOR STM32 - DCMI -M: Hugues Fruchet hugues.fruchet@st.com +M: Hugues Fruchet hugues.fruchet@foss.st.com L: linux-media@vger.kernel.org S: Supported T: git git://linuxtv.org/media_tree.git @@@ -11321,7 -11282,7 +11325,7 @@@ F: drivers/media/platform/mtk-vpu MEDIATEK MMC/SD/SDIO DRIVER M: Chaotian Jing chaotian.jing@mediatek.com S: Maintained -F: Documentation/devicetree/bindings/mmc/mtk-sd.txt +F: Documentation/devicetree/bindings/mmc/mtk-sd.yaml F: drivers/mmc/host/mtk-sd.c
MEDIATEK MT76 WIRELESS LAN DRIVER @@@ -11572,7 -11533,7 +11576,7 @@@ F: include/linux/memblock. F: mm/memblock.c
MEMORY CONTROLLER DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git @@@ -11840,7 -11801,7 +11844,7 @@@ MICROCHIP SAMA5D2-COMPATIBLE ADC DRIVE M: Eugen Hristev eugen.hristev@microchip.com L: linux-iio@vger.kernel.org S: Supported -F: Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt +F: Documentation/devicetree/bindings/iio/adc/atmel,sama5d2-adc.yaml F: drivers/iio/adc/at91-sama5d2_adc.c F: include/dt-bindings/iio/adc/at91-sama5d2_adc.h
@@@ -11919,13 -11880,6 +11923,13 @@@ S: Maintaine T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git F: drivers/platform/surface/
+MICROSOFT SURFACE HID TRANSPORT DRIVER +M: Maximilian Luz luzmaximilian@gmail.com +L: linux-input@vger.kernel.org +L: platform-driver-x86@vger.kernel.org +S: Maintained +F: drivers/hid/surface-hid/ + MICROSOFT SURFACE HOT-PLUG DRIVER M: Maximilian Luz luzmaximilian@gmail.com L: platform-driver-x86@vger.kernel.org @@@ -11947,7 -11901,6 +11951,7 @@@ F: Documentation/driver-api/surface_agg F: drivers/platform/surface/aggregator/ F: drivers/platform/surface/surface_acpi_notify.c F: drivers/platform/surface/surface_aggregator_cdev.c +F: drivers/platform/surface/surface_aggregator_registry.c F: include/linux/surface_acpi_notify.h F: include/linux/surface_aggregator/ F: include/uapi/linux/surface_aggregator/ @@@ -12918,7 -12871,7 +12922,7 @@@ F: Documentation/devicetree/bindings/re F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml @@@ -12961,21 -12914,6 +12965,21 @@@ L: linux-nfc@lists.01.org (moderated fo S: Supported F: drivers/nfc/nxp-nci
+NXP i.MX 8QXP/8QM JPEG V4L2 DRIVER +M: Mirela Rabulea mirela.rabulea@nxp.com +R: NXP Linux Team linux-imx@nxp.com +L: linux-media@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/media/imx8-jpeg.yaml +F: drivers/media/platform/imx-jpeg + +NZXT-KRAKEN2 HARDWARE MONITORING DRIVER +M: Jonas Malaco jonas@protocubo.io +L: linux-hwmon@vger.kernel.org +S: Maintained +F: Documentation/hwmon/nzxt-kraken2.rst +F: drivers/hwmon/nzxt-kraken2.c + OBJAGG M: Jiri Pirko jiri@nvidia.com L: netdev@vger.kernel.org @@@ -13096,6 -13034,12 +13100,6 @@@ L: linux-omap@vger.kernel.or S: Maintained F: arch/arm/mach-omap2/omap_hwmod*data*
-OMAP HWMOD DATA FOR OMAP4-BASED DEVICES -M: Beno��t Cousson bcousson@baylibre.com -L: linux-omap@vger.kernel.org -S: Maintained -F: arch/arm/mach-omap2/omap_hwmod_44xx_data.c - OMAP HWMOD SUPPORT M: Beno��t Cousson bcousson@baylibre.com M: Paul Walmsley paul@pwsan.com @@@ -13244,7 -13188,7 +13248,7 @@@ M: Rui Miguel Silva <rmfrfs@gmail.com L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media_tree.git -F: Documentation/devicetree/bindings/media/i2c/ov2680.yaml +F: Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml F: drivers/media/i2c/ov2680.c
OMNIVISION OV2685 SENSOR DRIVER @@@ -13903,7 -13847,7 +13907,7 @@@ M: Lorenzo Pieralisi <lorenzo.pieralisi R: Rob Herring robh@kernel.org L: linux-pci@vger.kernel.org S: Supported -Q: http://patchwork.ozlabs.org/project/linux-pci/list/ +Q: http://patchwork.kernel.org/project/linux-pci/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/ F: drivers/pci/controller/
@@@ -13911,7 -13855,7 +13915,7 @@@ PCI SUBSYSTE M: Bjorn Helgaas bhelgaas@google.com L: linux-pci@vger.kernel.org S: Supported -Q: http://patchwork.ozlabs.org/project/linux-pci/list/ +Q: http://patchwork.kernel.org/project/linux-pci/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git F: Documentation/PCI/ F: Documentation/devicetree/bindings/pci/ @@@ -13959,6 -13903,7 +13963,6 @@@ PCIE DRIVER FOR HISILICO M: Zhou Wang wangzhou1@hisilicon.com L: linux-pci@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/pci/hisilicon-pcie.txt F: drivers/pci/controller/dwc/pcie-hisi.c
PCIE DRIVER FOR HISILICON KIRIN @@@ -14217,7 -14162,7 +14221,7 @@@ F: drivers/pinctrl/renesas
PIN CONTROLLER - SAMSUNG M: Tomasz Figa tomasz.figa@gmail.com -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Sylwester Nawrocki s.nawrocki@samsung.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org @@@ -14338,7 -14283,7 +14342,7 @@@ PNI RM3100 IIO DRIVE M: Song Qiang songqiang1304521@gmail.com L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/iio/magnetometer/pni,rm3100.txt +F: Documentation/devicetree/bindings/iio/magnetometer/pni,rm3100.yaml F: drivers/iio/magnetometer/rm3100*
PNP SUPPORT @@@ -14373,15 -14318,6 +14377,15 @@@ F: include/linux/pm_ F: include/linux/powercap.h F: kernel/configs/nopm.config
+DYNAMIC THERMAL POWER MANAGEMENT (DTPM) +M: Daniel Lezcano daniel.lezcano@kernel.org +L: linux-pm@vger.kernel.org +S: Supported +B: https://bugzilla.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +F: drivers/powercap/dtpm* +F: include/linux/dtpm.h + POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland mark.rutland@arm.com M: Lorenzo Pieralisi lorenzo.pieralisi@arm.com @@@ -14459,7 -14395,7 +14463,7 @@@ F: kernel/sched/psi.
PRINTK M: Petr Mladek pmladek@suse.com -M: Sergey Senozhatsky sergey.senozhatsky@gmail.com +M: Sergey Senozhatsky senozhatsky@chromium.org R: Steven Rostedt rostedt@goodmis.org R: John Ogness john.ogness@linutronix.de S: Maintained @@@ -14851,7 -14787,7 +14855,7 @@@ M: Todor Tomov <todor.too@gmail.com L: linux-media@vger.kernel.org S: Maintained F: Documentation/admin-guide/media/qcom_camss.rst -F: Documentation/devicetree/bindings/media/qcom,camss.txt +F: Documentation/devicetree/bindings/media/*camss* F: drivers/media/platform/qcom/camss/
QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER @@@ -14920,14 -14856,6 +14924,14 @@@ L: linux-arm-msm@vger.kernel.or S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
+QUALCOMM IPC ROUTER (QRTR) DRIVER +M: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: include/trace/events/qrtr.h +F: include/uapi/linux/qrtr.h +F: net/qrtr/ + QUALCOMM IPCC MAILBOX DRIVER M: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org L: linux-arm-msm@vger.kernel.org @@@ -14963,7 -14891,6 +14967,7 @@@ F: include/linux/if_rmnet.
QUALCOMM TSENS THERMAL DRIVER M: Amit Kucheria amitk@kernel.org +M: Thara Gopinath thara.gopinath@linaro.org L: linux-pm@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained @@@ -15151,7 -15078,7 +15155,7 @@@ M: Laurent Pinchart <laurent.pinchart+r M: Niklas S��derlund niklas.soderlund+renesas@ragnatech.se L: linux-media@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/media/i2c/rdacm2x-gmsl.yaml +F: Documentation/devicetree/bindings/media/i2c/imi,rdacm2x-gmsl.yaml F: drivers/media/i2c/max9271.c F: drivers/media/i2c/max9271.h F: drivers/media/i2c/rdacm21.c @@@ -15328,7 -15255,7 +15332,7 @@@ RENESAS R-CAR GYROADC DRIVE M: Marek Vasut marek.vasut@gmail.com L: linux-iio@vger.kernel.org S: Supported -F: Documentation/devicetree/bindings/iio/adc/renesas,gyroadc.txt +F: Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml F: drivers/iio/adc/rcar-gyroadc.c
RENESAS R-CAR I2C DRIVERS @@@ -15614,8 -15541,8 +15618,8 @@@ T: git git://git.kernel.org/pub/scm/lin F: drivers/net/wireless/realtek/rtl8xxxu/
RTRS TRANSPORT DRIVERS -M: Danil Kipnis danil.kipnis@cloud.ionos.com -M: Jack Wang jinpu.wang@cloud.ionos.com +M: Md. Haris Iqbal haris.iqbal@ionos.com +M: Jack Wang jinpu.wang@ionos.com L: linux-rdma@vger.kernel.org S: Maintained F: drivers/infiniband/ulp/rtrs/ @@@ -15707,8 -15634,8 +15711,8 @@@ F: Documentation/s390/pci.rs
S390 VFIO AP DRIVER M: Tony Krowiak akrowiak@linux.ibm.com -M: Pierre Morel pmorel@linux.ibm.com M: Halil Pasic pasic@linux.ibm.com +M: Jason Herne jjherne@linux.ibm.com L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@@ -15720,7 -15647,6 +15724,7 @@@ F: drivers/s390/crypto/vfio_ap_private. S390 VFIO-CCW DRIVER M: Cornelia Huck cohuck@redhat.com M: Eric Farman farman@linux.ibm.com +M: Matthew Rosato mjrosato@linux.ibm.com R: Halil Pasic pasic@linux.ibm.com L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org @@@ -15731,7 -15657,6 +15735,7 @@@ F: include/uapi/linux/vfio_ccw.
S390 VFIO-PCI DRIVER M: Matthew Rosato mjrosato@linux.ibm.com +M: Eric Farman farman@linux.ibm.com L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org S: Supported @@@ -15792,7 -15717,7 +15796,7 @@@ F: Documentation/admin-guide/LSM/SafeSe F: security/safesetid/
SAMSUNG AUDIO (ASoC) DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Sylwester Nawrocki s.nawrocki@samsung.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported @@@ -15800,7 -15725,7 +15804,7 @@@ F: Documentation/devicetree/bindings/so F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@@ -15835,7 -15760,7 +15839,7 @@@ S: Maintaine F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@@ -15860,7 -15785,7 +15864,7 @@@ F: drivers/media/platform/s3c-camif F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Krzysztof Opasiak k.opasiak@samsung.com L: linux-nfc@lists.01.org (moderated for non-subscribers) S: Maintained @@@ -15880,7 -15805,7 +15884,7 @@@ S: Supporte F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Vladimir Zapolskiy vz@mleia.com L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@@ -15912,7 -15837,7 +15916,7 @@@ F: include/linux/clk/samsung. F: include/linux/platform_data/clk-s3c2410.h
SAMSUNG SPI DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Andi Shyti andi@etezian.org L: linux-spi@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@@ -16675,13 -16600,6 +16679,13 @@@ F: drivers/firmware/arm_sdei. F: include/linux/arm_sdei.h F: include/uapi/linux/arm_sdei.h
+SOFTWARE NODES +R: Andy Shevchenko andriy.shevchenko@linux.intel.com +R: Heikki Krogerus heikki.krogerus@linux.intel.com +L: linux-acpi@vger.kernel.org +S: Maintained +F: drivers/base/swnode.c + SOFTWARE RAID (Multiple Disks) SUPPORT M: Song Liu song@kernel.org L: linux-raid@vger.kernel.org @@@ -17013,7 -16931,7 +17017,7 @@@ M: Lorenzo Bianconi <lorenzo.bianconi83 L: linux-iio@vger.kernel.org S: Maintained W: http://www.st.com/ -F: Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt +F: Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml F: drivers/iio/imu/st_lsm6dsx/
ST MIPID02 CSI-2 TO PARALLEL BRIDGE DRIVER @@@ -17025,24 -16943,16 +17029,24 @@@ F: Documentation/devicetree/bindings/me F: drivers/media/i2c/st-mipid02.c
ST STM32 I2C/SMBUS DRIVER -M: Pierre-Yves MORDRET pierre-yves.mordret@st.com +M: Pierre-Yves MORDRET pierre-yves.mordret@foss.st.com +M: Alain Volmat alain.volmat@foss.st.com L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-stm32*
+ST STPDDC60 DRIVER +M: Daniel Nilsson daniel.nilsson@flex.com +L: linux-hwmon@vger.kernel.org +S: Maintained +F: Documentation/hwmon/stpddc60.rst +F: drivers/hwmon/pmbus/stpddc60.c + ST VL53L0X ToF RANGER(I2C) IIO DRIVER M: Song Qiang songqiang1304521@gmail.com L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/iio/proximity/vl53l0x.txt +F: Documentation/devicetree/bindings/iio/proximity/st,vl53l0x.yaml F: drivers/iio/proximity/vl53l0x-i2c.c
STABLE BRANCH @@@ -17158,7 -17068,7 +17162,7 @@@ F: kernel/jump_label. F: kernel/static_call.c
STI AUDIO (ASoC) DRIVERS -M: Arnaud Pouliquen arnaud.pouliquen@st.com +M: Arnaud Pouliquen arnaud.pouliquen@foss.st.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt @@@ -17178,15 -17088,15 +17182,15 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/usb/stk1160/
STM32 AUDIO (ASoC) DRIVERS -M: Olivier Moysan olivier.moysan@st.com -M: Arnaud Pouliquen arnaud.pouliquen@st.com +M: Olivier Moysan olivier.moysan@foss.st.com +M: Arnaud Pouliquen arnaud.pouliquen@foss.st.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml F: sound/soc/stm/
STM32 TIMER/LPTIMER DRIVERS -M: Fabrice Gasnier fabrice.gasnier@st.com +M: Fabrice Gasnier fabrice.gasnier@foss.st.com S: Maintained F: Documentation/ABI/testing/*timer-stm32 F: Documentation/devicetree/bindings/*/*stm32-*timer* @@@ -17196,7 -17106,7 +17200,7 @@@ F: include/linux/*/stm32-*tim
STMMAC ETHERNET DRIVER M: Giuseppe Cavallaro peppe.cavallaro@st.com -M: Alexandre Torgue alexandre.torgue@st.com +M: Alexandre Torgue alexandre.torgue@foss.st.com M: Jose Abreu joabreu@synopsys.com L: netdev@vger.kernel.org S: Supported @@@ -17362,7 -17272,7 +17366,7 @@@ F: drivers/spi/spi-dw SYNOPSYS DESIGNWARE AXI DMAC DRIVER M: Eugeniy Paltsev Eugeniy.Paltsev@synopsys.com S: Maintained -F: Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt +F: Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml F: drivers/dma/dw-axi-dmac/
SYNOPSYS DESIGNWARE DMAC DRIVER @@@ -17778,7 -17688,7 +17782,7 @@@ TEXAS INSTRUMENTS' DAC7612 DAC DRIVE M: Ricardo Ribalda ribalda@kernel.org L: linux-iio@vger.kernel.org S: Supported -F: Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt +F: Documentation/devicetree/bindings/iio/dac/ti,dac7612.yaml F: drivers/iio/dac/ti-dac7612.c
TEXAS INSTRUMENTS DMA DRIVERS @@@ -17862,7 -17772,7 +17866,7 @@@ THERMAL/CPU_COOLIN M: Amit Daniel Kachhap amit.kachhap@gmail.com M: Daniel Lezcano daniel.lezcano@linaro.org M: Viresh Kumar viresh.kumar@linaro.org -M: Javi Merino javi.merino@kernel.org +R: Lukasz Luba lukasz.luba@arm.com L: linux-pm@vger.kernel.org S: Supported F: Documentation/driver-api/thermal/cpu-cooling-api.rst @@@ -17938,6 -17848,7 +17942,6 @@@ S: Maintaine F: drivers/thermal/ti-soc-thermal/
TI BQ27XXX POWER SUPPLY DRIVER -R: Dan Murphy dmurphy@ti.com F: drivers/power/supply/bq27xxx_battery.c F: drivers/power/supply/bq27xxx_battery_i2c.c F: include/linux/power/bq27xxx_battery.h @@@ -18071,12 -17982,6 +18075,6 @@@ L: alsa-devel@alsa-project.org (moderat S: Odd Fixes F: sound/soc/codecs/tas571x*
- TI TCAN4X5X DEVICE DRIVER - L: linux-can@vger.kernel.org - S: Maintained - F: Documentation/devicetree/bindings/net/can/tcan4x5x.txt - F: drivers/net/can/m_can/tcan4x5x* - TI TRF7970A NFC DRIVER M: Mark Greer mgreer@animalcreek.com L: linux-wireless@vger.kernel.org @@@ -19314,7 -19219,7 +19312,7 @@@ F: drivers/net/vrf. VSPRINTF M: Petr Mladek pmladek@suse.com M: Steven Rostedt rostedt@goodmis.org -M: Sergey Senozhatsky sergey.senozhatsky@gmail.com +M: Sergey Senozhatsky senozhatsky@chromium.org R: Andy Shevchenko andriy.shevchenko@linux.intel.com R: Rasmus Villemoes linux@rasmusvillemoes.dk S: Maintained @@@ -19965,7 -19870,7 +19963,7 @@@ F: drivers/staging/media/zoran ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim minchan@kernel.org M: Nitin Gupta ngupta@vflare.org -R: Sergey Senozhatsky sergey.senozhatsky.work@gmail.com +R: Sergey Senozhatsky senozhatsky@chromium.org L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/admin-guide/blockdev/zram.rst @@@ -19979,7 -19884,7 +19977,7 @@@ F: drivers/tty/serial/zs. ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR M: Minchan Kim minchan@kernel.org M: Nitin Gupta ngupta@vflare.org -R: Sergey Senozhatsky sergey.senozhatsky.work@gmail.com +R: Sergey Senozhatsky senozhatsky@chromium.org L: linux-mm@kvack.org S: Maintained F: Documentation/vm/zsmalloc.rst diff --combined drivers/net/can/spi/mcp251x.c index a57da43680d8,80ab1593ca31..492f1bcb0516 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@@ -276,7 -276,7 +276,7 @@@ static void mcp251x_clean(struct net_de net->stats.tx_errors++; dev_kfree_skb(priv->tx_skb); if (priv->tx_len) - can_free_echo_skb(priv->net, 0); + can_free_echo_skb(priv->net, 0, NULL); priv->tx_skb = NULL; priv->tx_len = 0; } @@@ -314,18 -314,6 +314,18 @@@ static int mcp251x_spi_trans(struct spi return ret; }
+static int mcp251x_spi_write(struct spi_device *spi, int len) +{ + struct mcp251x_priv *priv = spi_get_drvdata(spi); + int ret; + + ret = spi_write(spi, priv->spi_tx_buf, len); + if (ret) + dev_err(&spi->dev, "spi write failed: ret = %d\n", ret); + + return ret; +} + static u8 mcp251x_read_reg(struct spi_device *spi, u8 reg) { struct mcp251x_priv *priv = spi_get_drvdata(spi); @@@ -373,7 -361,7 +373,7 @@@ static void mcp251x_write_reg(struct sp priv->spi_tx_buf[1] = reg; priv->spi_tx_buf[2] = val;
- mcp251x_spi_trans(spi, 3); + mcp251x_spi_write(spi, 3); }
static void mcp251x_write_2regs(struct spi_device *spi, u8 reg, u8 v1, u8 v2) @@@ -385,7 -373,7 +385,7 @@@ priv->spi_tx_buf[2] = v1; priv->spi_tx_buf[3] = v2;
- mcp251x_spi_trans(spi, 4); + mcp251x_spi_write(spi, 4); }
static void mcp251x_write_bits(struct spi_device *spi, u8 reg, @@@ -398,7 -386,7 +398,7 @@@ priv->spi_tx_buf[2] = mask; priv->spi_tx_buf[3] = val;
- mcp251x_spi_trans(spi, 4); + mcp251x_spi_write(spi, 4); }
static u8 mcp251x_read_stat(struct spi_device *spi) @@@ -630,7 -618,7 +630,7 @@@ static void mcp251x_hw_tx_frame(struct buf[i]); } else { memcpy(priv->spi_tx_buf, buf, TXBDAT_OFF + len); - mcp251x_spi_trans(spi, TXBDAT_OFF + len); + mcp251x_spi_write(spi, TXBDAT_OFF + len); } }
@@@ -662,7 -650,7 +662,7 @@@ static void mcp251x_hw_tx(struct spi_de
/* use INSTRUCTION_RTS, to avoid "repeated frame problem" */ priv->spi_tx_buf[0] = INSTRUCTION_RTS(1 << tx_buf_idx); - mcp251x_spi_trans(priv->spi, 1); + mcp251x_spi_write(priv->spi, 1); }
static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf, @@@ -900,7 -888,7 +900,7 @@@ static int mcp251x_hw_reset(struct spi_ mdelay(MCP251X_OST_DELAY_MS);
priv->spi_tx_buf[0] = INSTRUCTION_RESET; - ret = mcp251x_spi_trans(spi, 1); + ret = mcp251x_spi_write(spi, 1); if (ret) return ret;
diff --combined drivers/net/can/usb/peak_usb/pcan_usb_core.c index 28e916a04047,ad006edf474d..e69b005be068 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@@ -14,6 -14,7 +14,7 @@@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/usb.h> + #include <linux/ethtool.h>
#include <linux/can.h> #include <linux/can/dev.h> @@@ -371,7 -372,7 +372,7 @@@ static netdev_tx_t peak_usb_ndo_start_x
err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL);
usb_unanchor_urb(urb);
@@@ -820,6 -821,9 +821,9 @@@ static int peak_usb_create_dev(const st
netdev->flags |= IFF_ECHO; /* we support local echo */
+ /* add ethtool support */ + netdev->ethtool_ops = peak_usb_adapter->ethtool_ops; + init_usb_anchor(&dev->rx_submitted);
init_usb_anchor(&dev->tx_submitted); @@@ -857,7 -861,7 +861,7 @@@ if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 0); if (err) - goto lbl_unregister_candev; + goto adap_dev_free; }
/* get device number early */ @@@ -869,10 -873,6 +873,10 @@@
return 0;
+adap_dev_free: + if (dev->adapter->dev_free) + dev->adapter->dev_free(dev); + lbl_unregister_candev: unregister_candev(netdev);
diff --combined drivers/net/dsa/lantiq_gswip.c index 809dfa3be6bb,26d0e3bb5dea..20774ed4bd7f --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@@ -1,6 -1,6 +1,6 @@@ // SPDX-License-Identifier: GPL-2.0 /* - * Lantiq / Intel GSWIP switch driver for VRX200 SoCs + * Lantiq / Intel GSWIP switch driver for VRX200, xRX300 and xRX330 SoCs * * Copyright (C) 2010 Lantiq Deutschland * Copyright (C) 2012 John Crispin john@phrozen.org @@@ -100,6 -100,7 +100,7 @@@ #define GSWIP_MII_CFG_MODE_RMIIP 0x2 #define GSWIP_MII_CFG_MODE_RMIIM 0x3 #define GSWIP_MII_CFG_MODE_RGMII 0x4 + #define GSWIP_MII_CFG_MODE_GMII 0x9 #define GSWIP_MII_CFG_MODE_MASK 0xf #define GSWIP_MII_CFG_RATE_M2P5 0x00 #define GSWIP_MII_CFG_RATE_M25 0x10 @@@ -220,6 -221,7 +221,7 @@@ struct gswip_hw_info { int max_ports; int cpu_port; + const struct dsa_switch_ops *ops; };
struct xway_gphy_match_data { @@@ -799,15 -801,10 +801,15 @@@ static int gswip_setup(struct dsa_switc /* Configure the MDIO Clock 2.5 MHz */ gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
- /* Disable the xMII link */ - for (i = 0; i < priv->hw_info->max_ports; i++) + for (i = 0; i < priv->hw_info->max_ports; i++) { + /* Disable the xMII link */ gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i);
+ /* Automatically select the xMII interface clock */ + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, + GSWIP_MII_CFG_RATE_AUTO, i); + } + /* enable special tag insertion on cpu port */ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, GSWIP_FDMA_PCTRLp(cpu_port)); @@@ -1389,12 -1386,42 +1391,42 @@@ static int gswip_port_fdb_dump(struct d return 0; }
- static void gswip_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) + static void gswip_phylink_set_capab(unsigned long *supported, + struct phylink_link_state *state) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+ /* Allow all the expected bits */ + phylink_set(mask, Autoneg); + phylink_set_port_modes(mask); + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + + /* With the exclusion of MII, Reverse MII and Reduced MII, we + * support Gigabit, including Half duplex + */ + if (state->interface != PHY_INTERFACE_MODE_MII && + state->interface != PHY_INTERFACE_MODE_REVMII && + state->interface != PHY_INTERFACE_MODE_RMII) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseT_Half); + } + + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + } + + static void gswip_xrx200_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) + { switch (port) { case 0: case 1: @@@ -1421,38 -1448,54 +1453,54 @@@ return; }
- /* Allow all the expected bits */ - phylink_set(mask, Autoneg); - phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); + gswip_phylink_set_capab(supported, state);
- /* With the exclusion of MII, Reverse MII and Reduced MII, we - * support Gigabit, including Half duplex - */ - if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII && - state->interface != PHY_INTERFACE_MODE_RMII) { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseT_Half); + return; + + unsupported: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported interface '%s' for port %d\n", + phy_modes(state->interface), port); + } + + static void gswip_xrx300_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) + { + switch (port) { + case 0: + if (!phy_interface_mode_is_rgmii(state->interface) && + state->interface != PHY_INTERFACE_MODE_GMII && + state->interface != PHY_INTERFACE_MODE_RMII) + goto unsupported; + break; + case 1: + case 2: + case 3: + case 4: + if (state->interface != PHY_INTERFACE_MODE_INTERNAL) + goto unsupported; + break; + case 5: + if (!phy_interface_mode_is_rgmii(state->interface) && + state->interface != PHY_INTERFACE_MODE_INTERNAL && + state->interface != PHY_INTERFACE_MODE_RMII) + goto unsupported; + break; + default: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported port: %i\n", port); + return; }
- phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); + gswip_phylink_set_capab(supported, state);
- bitmap_and(supported, supported, mask, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_and(state->advertising, state->advertising, mask, - __ETHTOOL_LINK_MODE_MASK_NBITS); return;
unsupported: bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); dev_err(ds->dev, "Unsupported interface '%s' for port %d\n", phy_modes(state->interface), port); - return; }
static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, @@@ -1481,6 -1524,9 +1529,9 @@@ case PHY_INTERFACE_MODE_RGMII_TXID: miicfg |= GSWIP_MII_CFG_MODE_RGMII; break; + case PHY_INTERFACE_MODE_GMII: + miicfg |= GSWIP_MII_CFG_MODE_GMII; + break; default: dev_err(ds->dev, "Unsupported interface: %d\n", state->interface); @@@ -1593,7 -1639,7 +1644,7 @@@ static int gswip_get_sset_count(struct return ARRAY_SIZE(gswip_rmon_cnt); }
- static const struct dsa_switch_ops gswip_switch_ops = { + static const struct dsa_switch_ops gswip_xrx200_switch_ops = { .get_tag_protocol = gswip_get_tag_protocol, .setup = gswip_setup, .port_enable = gswip_port_enable, @@@ -1608,7 -1654,31 +1659,31 @@@ .port_fdb_add = gswip_port_fdb_add, .port_fdb_del = gswip_port_fdb_del, .port_fdb_dump = gswip_port_fdb_dump, - .phylink_validate = gswip_phylink_validate, + .phylink_validate = gswip_xrx200_phylink_validate, + .phylink_mac_config = gswip_phylink_mac_config, + .phylink_mac_link_down = gswip_phylink_mac_link_down, + .phylink_mac_link_up = gswip_phylink_mac_link_up, + .get_strings = gswip_get_strings, + .get_ethtool_stats = gswip_get_ethtool_stats, + .get_sset_count = gswip_get_sset_count, + }; + + static const struct dsa_switch_ops gswip_xrx300_switch_ops = { + .get_tag_protocol = gswip_get_tag_protocol, + .setup = gswip_setup, + .port_enable = gswip_port_enable, + .port_disable = gswip_port_disable, + .port_bridge_join = gswip_port_bridge_join, + .port_bridge_leave = gswip_port_bridge_leave, + .port_fast_age = gswip_port_fast_age, + .port_vlan_filtering = gswip_port_vlan_filtering, + .port_vlan_add = gswip_port_vlan_add, + .port_vlan_del = gswip_port_vlan_del, + .port_stp_state_set = gswip_port_stp_state_set, + .port_fdb_add = gswip_port_fdb_add, + .port_fdb_del = gswip_port_fdb_del, + .port_fdb_dump = gswip_port_fdb_dump, + .phylink_validate = gswip_xrx300_phylink_validate, .phylink_mac_config = gswip_phylink_mac_config, .phylink_mac_link_down = gswip_phylink_mac_link_down, .phylink_mac_link_up = gswip_phylink_mac_link_up, @@@ -1837,7 -1907,7 +1912,7 @@@ remove_gphy static int gswip_probe(struct platform_device *pdev) { struct gswip_priv *priv; - struct device_node *mdio_np, *gphy_fw_np; + struct device_node *np, *mdio_np, *gphy_fw_np; struct device *dev = &pdev->dev; int err; int i; @@@ -1870,10 -1940,28 +1945,28 @@@ priv->ds->dev = dev; priv->ds->num_ports = priv->hw_info->max_ports; priv->ds->priv = priv; - priv->ds->ops = &gswip_switch_ops; + priv->ds->ops = priv->hw_info->ops; priv->dev = dev; version = gswip_switch_r(priv, GSWIP_VERSION);
+ np = dev->of_node; + switch (version) { + case GSWIP_VERSION_2_0: + case GSWIP_VERSION_2_1: + if (!of_device_is_compatible(np, "lantiq,xrx200-gswip")) + return -EINVAL; + break; + case GSWIP_VERSION_2_2: + case GSWIP_VERSION_2_2_ETC: + if (!of_device_is_compatible(np, "lantiq,xrx300-gswip") && + !of_device_is_compatible(np, "lantiq,xrx330-gswip")) + return -EINVAL; + break; + default: + dev_err(dev, "unknown GSWIP version: 0x%x", version); + return -ENOENT; + } + /* bring up the mdio bus */ gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw"); if (gphy_fw_np) { @@@ -1951,10 -2039,19 +2044,19 @@@ static int gswip_remove(struct platform static const struct gswip_hw_info gswip_xrx200 = { .max_ports = 7, .cpu_port = 6, + .ops = &gswip_xrx200_switch_ops, + }; + + static const struct gswip_hw_info gswip_xrx300 = { + .max_ports = 7, + .cpu_port = 6, + .ops = &gswip_xrx300_switch_ops, };
static const struct of_device_id gswip_of_match[] = { { .compatible = "lantiq,xrx200-gswip", .data = &gswip_xrx200 }, + { .compatible = "lantiq,xrx300-gswip", .data = &gswip_xrx300 }, + { .compatible = "lantiq,xrx330-gswip", .data = &gswip_xrx300 }, {}, }; MODULE_DEVICE_TABLE(of, gswip_of_match); diff --combined drivers/net/ethernet/broadcom/bcm4908_enet.c index 65981931a798,cbfed1d1477b..b7afac5c7ca7 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@@ -9,6 -9,7 +9,7 @@@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/of.h> + #include <linux/of_net.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/string.h> @@@ -53,6 -54,7 +54,7 @@@ struct bcm4908_enet_dma_ring int length; u16 cfg_block; u16 st_ram_block; + struct napi_struct napi;
union { void *cpu_addr; @@@ -66,8 -68,8 +68,8 @@@ struct bcm4908_enet { struct device *dev; struct net_device *netdev; - struct napi_struct napi; void __iomem *base; + int irq_tx;
struct bcm4908_enet_dma_ring tx_ring; struct bcm4908_enet_dma_ring rx_ring; @@@ -122,24 -124,31 +124,31 @@@ static void enet_umac_set(struct bcm490 * Helpers */
- static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet) + static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); + enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD); }
- static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet) + /*** + * DMA ring ops + */ + + static void bcm4908_enet_dma_ring_intrs_on(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); }
- static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet) + static void bcm4908_enet_dma_ring_intrs_off(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0); }
- static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu) + static void bcm4908_enet_dma_ring_intrs_ack(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); }
/*** @@@ -172,7 -181,6 +181,7 @@@ static int bcm4908_dma_alloc_buf_descs(
err_free_buf_descs: dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr); + ring->cpu_addr = NULL; return -ENOMEM; }
@@@ -414,11 -422,14 +423,14 @@@ static void bcm4908_enet_gmac_init(stru static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id) { struct bcm4908_enet *enet = dev_id; + struct bcm4908_enet_dma_ring *ring;
- bcm4908_enet_intrs_off(enet); - bcm4908_enet_intrs_ack(enet); + ring = (irq == enet->irq_tx) ? &enet->tx_ring : &enet->rx_ring;
- napi_schedule(&enet->napi); + bcm4908_enet_dma_ring_intrs_off(enet, ring); + bcm4908_enet_dma_ring_intrs_ack(enet, ring); + + napi_schedule(&ring->napi);
return IRQ_HANDLED; } @@@ -426,6 -437,8 +438,8 @@@ static int bcm4908_enet_open(struct net_device *netdev) { struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; struct device *dev = enet->dev; int err;
@@@ -435,6 -448,17 +449,17 @@@ return err; }
+ if (enet->irq_tx > 0) { + err = request_irq(enet->irq_tx, bcm4908_enet_irq_handler, 0, + "tx", enet); + if (err) { + dev_err(dev, "Failed to request IRQ %d: %d\n", + enet->irq_tx, err); + free_irq(netdev->irq, enet); + return err; + } + } + bcm4908_enet_gmac_init(enet); bcm4908_enet_dma_reset(enet); bcm4908_enet_dma_init(enet); @@@ -443,14 -467,19 +468,19 @@@
enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN); enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0); - bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
- napi_enable(&enet->napi); + if (enet->irq_tx > 0) { + napi_enable(&tx_ring->napi); + bcm4908_enet_dma_ring_intrs_ack(enet, tx_ring); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); + } + + bcm4908_enet_dma_rx_ring_enable(enet, rx_ring); + napi_enable(&rx_ring->napi); netif_carrier_on(netdev); netif_start_queue(netdev); - - bcm4908_enet_intrs_ack(enet); - bcm4908_enet_intrs_on(enet); + bcm4908_enet_dma_ring_intrs_ack(enet, rx_ring); + bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
return 0; } @@@ -458,16 -487,20 +488,20 @@@ static int bcm4908_enet_stop(struct net_device *netdev) { struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
netif_stop_queue(netdev); netif_carrier_off(netdev); - napi_disable(&enet->napi); + napi_disable(&rx_ring->napi); + napi_disable(&tx_ring->napi);
bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring); bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring);
bcm4908_enet_dma_uninit(enet);
+ free_irq(enet->irq_tx, enet); free_irq(enet->netdev->irq, enet);
return 0; @@@ -484,25 -517,19 +518,19 @@@ static int bcm4908_enet_start_xmit(stru u32 tmp;
/* Free transmitted skbs */ - while (ring->read_idx != ring->write_idx) { - buf_desc = &ring->buf_desc[ring->read_idx]; - if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) - break; - slot = &ring->slots[ring->read_idx]; - - dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); - dev_kfree_skb(slot->skb); - if (++ring->read_idx == ring->length) - ring->read_idx = 0; - } + if (enet->irq_tx < 0 && + !(le32_to_cpu(ring->buf_desc[ring->read_idx].ctl) & DMA_CTL_STATUS_OWN)) + napi_schedule(&enet->tx_ring.napi);
/* Don't use the last empty buf descriptor */ if (ring->read_idx <= ring->write_idx) free_buf_descs = ring->read_idx - ring->write_idx + ring->length; else free_buf_descs = ring->read_idx - ring->write_idx; - if (free_buf_descs < 2) + if (free_buf_descs < 2) { + netif_stop_queue(netdev); return NETDEV_TX_BUSY; + }
/* Hardware removes OWN bit after sending data */ buf_desc = &ring->buf_desc[ring->write_idx]; @@@ -539,9 -566,10 +567,10 @@@ return NETDEV_TX_OK; }
- static int bcm4908_enet_poll(struct napi_struct *napi, int weight) + static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight) { - struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi); + struct bcm4908_enet_dma_ring *rx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi); + struct bcm4908_enet *enet = container_of(rx_ring, struct bcm4908_enet, rx_ring); struct device *dev = enet->dev; int handled = 0;
@@@ -590,7 -618,7 +619,7 @@@
if (handled < weight) { napi_complete_done(napi, handled); - bcm4908_enet_intrs_on(enet); + bcm4908_enet_dma_ring_intrs_on(enet, rx_ring); }
/* Hardware could disable ring if it run out of descriptors */ @@@ -599,6 -627,42 +628,42 @@@ return handled; }
+ static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight) + { + struct bcm4908_enet_dma_ring *tx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi); + struct bcm4908_enet *enet = container_of(tx_ring, struct bcm4908_enet, tx_ring); + struct bcm4908_enet_dma_ring_bd *buf_desc; + struct bcm4908_enet_dma_ring_slot *slot; + struct device *dev = enet->dev; + unsigned int bytes = 0; + int handled = 0; + + while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) { + buf_desc = &tx_ring->buf_desc[tx_ring->read_idx]; + if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) + break; + slot = &tx_ring->slots[tx_ring->read_idx]; + + dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); + dev_kfree_skb(slot->skb); + bytes += slot->len; + if (++tx_ring->read_idx == tx_ring->length) + tx_ring->read_idx = 0; + + handled++; + } + + if (handled < weight) { + napi_complete_done(napi, handled); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); + } + + if (netif_queue_stopped(enet->netdev)) + netif_wake_queue(enet->netdev); + + return handled; + } + static int bcm4908_enet_change_mtu(struct net_device *netdev, int new_mtu) { struct bcm4908_enet *enet = netdev_priv(netdev); @@@ -621,6 -685,7 +686,7 @@@ static int bcm4908_enet_probe(struct pl struct device *dev = &pdev->dev; struct net_device *netdev; struct bcm4908_enet *enet; + const u8 *mac; int err;
netdev = devm_alloc_etherdev(dev, sizeof(*enet)); @@@ -641,6 -706,8 +707,8 @@@ if (netdev->irq < 0) return netdev->irq;
+ enet->irq_tx = platform_get_irq_byname(pdev, "tx"); + dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
err = bcm4908_enet_dma_alloc(enet); @@@ -648,12 -715,17 +716,17 @@@ return err;
SET_NETDEV_DEV(netdev, &pdev->dev); - eth_hw_addr_random(netdev); + mac = of_get_mac_address(dev->of_node); + if (!IS_ERR(mac)) + ether_addr_copy(netdev->dev_addr, mac); + else + eth_hw_addr_random(netdev); netdev->netdev_ops = &bcm4908_enet_netdev_ops; netdev->min_mtu = ETH_ZLEN; netdev->mtu = ETH_DATA_LEN; netdev->max_mtu = ENET_MTU_MAX; - netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64); + netif_tx_napi_add(netdev, &enet->tx_ring.napi, bcm4908_enet_poll_tx, NAPI_POLL_WEIGHT); + netif_napi_add(netdev, &enet->rx_ring.napi, bcm4908_enet_poll_rx, NAPI_POLL_WEIGHT);
err = register_netdev(netdev); if (err) { @@@ -671,7 -743,8 +744,8 @@@ static int bcm4908_enet_remove(struct p struct bcm4908_enet *enet = platform_get_drvdata(pdev);
unregister_netdev(enet->netdev); - netif_napi_del(&enet->napi); + netif_napi_del(&enet->rx_ring.napi); + netif_napi_del(&enet->tx_ring.napi); bcm4908_enet_dma_free(enet);
return 0; diff --combined drivers/net/ethernet/cadence/macb_main.c index 6e5cf490c01d,f56f3dbbc015..ffd56a23f8b0 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@@ -694,6 -694,22 +694,22 @@@ static void macb_mac_config(struct phyl if (old_ncr ^ ncr) macb_or_gem_writel(bp, NCR, ncr);
+ /* Disable AN for SGMII fixed link configuration, enable otherwise. + * Must be written after PCSSEL is set in NCFGR, + * otherwise writes will not take effect. + */ + if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) { + u32 pcsctrl, old_pcsctrl; + + old_pcsctrl = gem_readl(bp, PCSCNTRL); + if (mode == MLO_AN_FIXED) + pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG); + else + pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG); + if (old_pcsctrl != pcsctrl) + gem_writel(bp, PCSCNTRL, pcsctrl); + } + spin_unlock_irqrestore(&bp->lock, flags); }
@@@ -847,6 -863,15 +863,15 @@@ static int macb_phylink_connect(struct return 0; }
+ static void macb_get_pcs_fixed_state(struct phylink_config *config, + struct phylink_link_state *state) + { + struct net_device *ndev = to_net_dev(config->dev); + struct macb *bp = netdev_priv(ndev); + + state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0; + } + /* based on au1000_eth. c*/ static int macb_mii_probe(struct net_device *dev) { @@@ -855,6 -880,11 +880,11 @@@ bp->phylink_config.dev = &dev->dev; bp->phylink_config.type = PHYLINK_NETDEV;
+ if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { + bp->phylink_config.poll_fixed_state = true; + bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state; + } + bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode, bp->phy_interface, &macb_phylink_ops); if (IS_ERR(bp->phylink)) { @@@ -3239,9 -3269,6 +3269,9 @@@ static void gem_prog_cmp_regs(struct ma bool cmp_b = false; bool cmp_c = false;
+ if (!macb_is_gem(bp)) + return; + tp4sp_v = &(fs->h_u.tcp_ip4_spec); tp4sp_m = &(fs->m_u.tcp_ip4_spec);
@@@ -3610,7 -3637,6 +3640,7 @@@ static void macb_restore_features(struc { struct net_device *netdev = bp->dev; netdev_features_t features = netdev->features; + struct ethtool_rx_fs_item *item;
/* TX checksum offload */ macb_set_txcsum_feature(bp, features); @@@ -3619,9 -3645,6 +3649,9 @@@ macb_set_rxcsum_feature(bp, features);
/* RX Flow Filters */ + list_for_each_entry(item, &bp->rx_fs_list.list, list) + gem_prog_cmp_regs(bp, &item->fs); + macb_set_rxflow_feature(bp, features); }
@@@ -3735,17 -3758,15 +3765,15 @@@ static int macb_clk_init(struct platfor *hclk = devm_clk_get(&pdev->dev, "hclk"); }
- if (IS_ERR_OR_NULL(*pclk)) { - err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); - return err; - } + if (IS_ERR_OR_NULL(*pclk)) + return dev_err_probe(&pdev->dev, + IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV, + "failed to get pclk\n");
- if (IS_ERR_OR_NULL(*hclk)) { - err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); - return err; - } + if (IS_ERR_OR_NULL(*hclk)) + return dev_err_probe(&pdev->dev, + IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV, + "failed to get hclk\n");
*tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk"); if (IS_ERR(*tx_clk)) diff --combined drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 23a2ebdfd503,d2ba40c19696..a7f291c89702 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@@ -1794,25 -1794,11 +1794,25 @@@ int cudbg_collect_sge_indirect(struct c struct cudbg_buffer temp_buff = { 0 }; struct sge_qbase_reg_field *sge_qbase; struct ireg_buf *ch_sge_dbg; + u8 padap_running = 0; int i, rc; + u32 size;
- rc = cudbg_get_buff(pdbg_init, dbg_buff, - sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase), - &temp_buff); + /* Accessing SGE_QBASE_MAP[0-3] and SGE_QBASE_INDEX regs can + * lead to SGE missing doorbells under heavy traffic. So, only + * collect them when adapter is idle. + */ + for_each_port(padap, i) { + padap_running = netif_running(padap->port[i]); + if (padap_running) + break; + } + + size = sizeof(*ch_sge_dbg) * 2; + if (!padap_running) + size += sizeof(*sge_qbase); + + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc;
@@@ -1834,8 -1820,7 +1834,8 @@@ ch_sge_dbg++; }
- if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5 && + !padap_running) { sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg; /* 1 addr reg SGE_QBASE_INDEX and 4 data reg * SGE_QBASE_MAP[0-3] @@@ -3551,8 -3536,7 +3551,7 @@@ out }
out_free: - if (data) - kvfree(data); + kvfree(data);
#undef QDESC_GET_FLQ #undef QDESC_GET_RXQ diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2dd2af269b46,bc805d5fb16e..94adec64cb3b --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@@ -62,7 -62,7 +62,7 @@@ static void hclge_sync_vlan_filter(stru static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle); static void hclge_rfs_filter_expire(struct hclge_dev *hdev); - static void hclge_clear_arfs_rules(struct hnae3_handle *handle); + static int hclge_clear_arfs_rules(struct hclge_dev *hdev); static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, unsigned long *addr); static int hclge_set_default_loopback(struct hclge_dev *hdev); @@@ -70,6 -70,7 +70,7 @@@ static void hclge_sync_mac_table(struct hclge_dev *hdev); static void hclge_restore_hw_table(struct hclge_dev *hdev); static void hclge_sync_promisc_mode(struct hclge_dev *hdev); + static void hclge_sync_fd_table(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@@@ -384,36 -385,62 +385,62 @@@ static const struct key_info meta_data_ };
static const struct key_info tuple_key_info[] = { - { OUTER_DST_MAC, 48}, - { OUTER_SRC_MAC, 48}, - { OUTER_VLAN_TAG_FST, 16}, - { OUTER_VLAN_TAG_SEC, 16}, - { OUTER_ETH_TYPE, 16}, - { OUTER_L2_RSV, 16}, - { OUTER_IP_TOS, 8}, - { OUTER_IP_PROTO, 8}, - { OUTER_SRC_IP, 32}, - { OUTER_DST_IP, 32}, - { OUTER_L3_RSV, 16}, - { OUTER_SRC_PORT, 16}, - { OUTER_DST_PORT, 16}, - { OUTER_L4_RSV, 32}, - { OUTER_TUN_VNI, 24}, - { OUTER_TUN_FLOW_ID, 8}, - { INNER_DST_MAC, 48}, - { INNER_SRC_MAC, 48}, - { INNER_VLAN_TAG_FST, 16}, - { INNER_VLAN_TAG_SEC, 16}, - { INNER_ETH_TYPE, 16}, - { INNER_L2_RSV, 16}, - { INNER_IP_TOS, 8}, - { INNER_IP_PROTO, 8}, - { INNER_SRC_IP, 32}, - { INNER_DST_IP, 32}, - { INNER_L3_RSV, 16}, - { INNER_SRC_PORT, 16}, - { INNER_DST_PORT, 16}, - { INNER_L4_RSV, 32}, + { OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 }, + { OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 }, + { OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 }, + { INNER_DST_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.dst_mac), + offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) }, + { INNER_SRC_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.src_mac), + offsetof(struct hclge_fd_rule, tuples_mask.src_mac) }, + { INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.vlan_tag1), + offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) }, + { INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_ETH_TYPE, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.ether_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) }, + { INNER_L2_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l2_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) }, + { INNER_IP_TOS, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_tos), + offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) }, + { INNER_IP_PROTO, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) }, + { INNER_SRC_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.src_ip), + offsetof(struct hclge_fd_rule, tuples_mask.src_ip) }, + { INNER_DST_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.dst_ip), + offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) }, + { INNER_L3_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l3_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) }, + { INNER_SRC_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.src_port), + offsetof(struct hclge_fd_rule, tuples_mask.src_port) }, + { INNER_DST_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.dst_port), + offsetof(struct hclge_fd_rule, tuples_mask.dst_port) }, + { INNER_L4_RSV, 32, KEY_OPT_LE32, + offsetof(struct hclge_fd_rule, tuples.l4_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) }, };
static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) @@@ -526,7 -553,6 +553,6 @@@ static int hclge_mac_update_stats(struc int ret;
ret = hclge_mac_query_reg_num(hdev, &desc_num); - /* The firmware supports the new statistics acquisition method */ if (!ret) ret = hclge_mac_update_stats_complete(hdev, desc_num); @@@ -751,12 -777,12 +777,12 @@@ static int hclge_get_sset_count(struct handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK; handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
- if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && - hdev->hw.mac.phydev->drv->set_loopback) { + if ((hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && + hdev->hw.mac.phydev->drv->set_loopback) || + hnae3_dev_phy_imp_supported(hdev)) { count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } - } else if (stringset == ETH_SS_STATS) { count = ARRAY_SIZE(g_mac_stats_string) + hclge_tqps_get_sset_count(handle, stringset); @@@ -1150,8 -1176,10 +1176,10 @@@ static void hclge_parse_fiber_link_mode if (hnae3_dev_fec_supported(hdev)) hclge_convert_setting_fec(mac);
+ if (hnae3_dev_pause_supported(hdev)) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported); }
@@@ -1163,8 -1191,11 +1191,11 @@@ static void hclge_parse_backplane_link_ hclge_convert_setting_kr(mac, speed_ability); if (hnae3_dev_fec_supported(hdev)) hclge_convert_setting_fec(mac); + + if (hnae3_dev_pause_supported(hdev)) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported); }
@@@ -1193,10 -1224,13 +1224,13 @@@ static void hclge_parse_copper_link_mod linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported); }
+ if (hnae3_dev_pause_supported(hdev)) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); + } + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); }
static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability) @@@ -1256,9 -1290,6 +1290,6 @@@ static void hclge_parse_cfg(struct hclg req = (struct hclge_cfg_param_cmd *)desc[0].data;
/* get the configuration */ - cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]), - HCLGE_CFG_VMDQ_M, - HCLGE_CFG_VMDQ_S); cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S); cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), @@@ -1475,7 -1506,7 +1506,7 @@@ static void hclge_init_kdump_kernel_con "Running kdump kernel. Using minimal resources\n");
/* minimal queue pairs equals to the number of vports */ - hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + hdev->num_tqps = hdev->num_req_vfs + 1; hdev->num_tx_desc = HCLGE_MIN_TX_DESC; hdev->num_rx_desc = HCLGE_MIN_RX_DESC; } @@@ -1490,7 -1521,6 +1521,6 @@@ static int hclge_configure(struct hclge if (ret) return ret;
- hdev->num_vmdq_vport = cfg.vmdq_vport_num; hdev->base_tqp_pid = 0; hdev->vf_rss_size_max = cfg.vf_rss_size_max; hdev->pf_rss_size_max = cfg.pf_rss_size_max; @@@ -1741,7 -1771,7 +1771,7 @@@ static int hclge_map_tqp(struct hclge_d struct hclge_vport *vport = hdev->vport; u16 i, num_vport;
- num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1; for (i = 0; i < num_vport; i++) { int ret;
@@@ -1783,7 -1813,7 +1813,7 @@@ static int hclge_alloc_vport(struct hcl int ret;
/* We need to alloc a vport for main NIC of PF */ - num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1;
if (hdev->num_tqps < num_vport) { dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)", @@@ -2159,7 -2189,6 +2189,6 @@@ static int hclge_only_alloc_priv_buff(s COMPENSATE_HALF_MPS_NUM * half_mps; min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT); rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT); - if (rx_priv < min_rx_priv) return false;
@@@ -2188,7 -2217,7 +2217,7 @@@ /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev * @buf_alloc: pointer to buffer calculation data - * @return: 0: calculate sucessful, negative: fail + * @return: 0: calculate successful, negative: fail */ static int hclge_rx_buffer_calc(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) @@@ -2853,13 -2882,12 +2882,12 @@@ static int hclge_get_mac_phy_link(struc
static void hclge_update_link_status(struct hclge_dev *hdev) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; - struct hnae3_handle *rhandle; - struct hnae3_handle *handle; int state; int ret; - int i;
if (!client) return; @@@ -2874,25 -2902,23 +2902,23 @@@ }
if (state != hdev->hw.mac.link) { - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - handle = &hdev->vport[i].nic; - client->ops->link_status_change(handle, state); - hclge_config_mac_tnl_int(hdev, state); - rhandle = &hdev->vport[i].roce; - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, - state); - } + client->ops->link_status_change(handle, state); + hclge_config_mac_tnl_int(hdev, state); + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, state); + hdev->hw.mac.link = state; }
clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); }
- static void hclge_update_port_capability(struct hclge_mac *mac) + static void hclge_update_port_capability(struct hclge_dev *hdev, + struct hclge_mac *mac) { - /* update fec ability by speed */ - hclge_convert_setting_fec(mac); + if (hnae3_dev_fec_supported(hdev)) + /* update fec ability by speed */ + hclge_convert_setting_fec(mac);
/* firmware can not identify back plane type, the media type * read from configuration can help deal it @@@ -2984,6 -3010,141 +3010,141 @@@ static int hclge_get_sfp_info(struct hc return 0; }
+ static int hclge_get_phy_link_ksettings(struct hnae3_handle *handle, + struct ethtool_link_ksettings *cmd) + { + struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM]; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_phy_link_ksetting_0_cmd *req0; + struct hclge_phy_link_ksetting_1_cmd *req1; + u32 supported, advertising, lp_advertising; + struct hclge_dev *hdev = vport->back; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING, + true); + + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get phy link ksetting, ret = %d.\n", ret); + return ret; + } + + req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data; + cmd->base.autoneg = req0->autoneg; + cmd->base.speed = le32_to_cpu(req0->speed); + cmd->base.duplex = req0->duplex; + cmd->base.port = req0->port; + cmd->base.transceiver = req0->transceiver; + cmd->base.phy_address = req0->phy_address; + cmd->base.eth_tp_mdix = req0->eth_tp_mdix; + cmd->base.eth_tp_mdix_ctrl = req0->eth_tp_mdix_ctrl; + supported = le32_to_cpu(req0->supported); + advertising = le32_to_cpu(req0->advertising); + lp_advertising = le32_to_cpu(req0->lp_advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); + + req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data; + cmd->base.master_slave_cfg = req1->master_slave_cfg; + cmd->base.master_slave_state = req1->master_slave_state; + + return 0; + } + + static int + hclge_set_phy_link_ksettings(struct hnae3_handle *handle, + const struct ethtool_link_ksettings *cmd) + { + struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM]; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_phy_link_ksetting_0_cmd *req0; + struct hclge_phy_link_ksetting_1_cmd *req1; + struct hclge_dev *hdev = vport->back; + u32 advertising; + int ret; + + if (cmd->base.autoneg == AUTONEG_DISABLE && + ((cmd->base.speed != SPEED_100 && cmd->base.speed != SPEED_10) || + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL))) + return -EINVAL; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING, + false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING, + false); + + req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data; + req0->autoneg = cmd->base.autoneg; + req0->speed = cpu_to_le32(cmd->base.speed); + req0->duplex = cmd->base.duplex; + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + req0->advertising = cpu_to_le32(advertising); + req0->eth_tp_mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data; + req1->master_slave_cfg = cmd->base.master_slave_cfg; + + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to set phy link ksettings, ret = %d.\n", ret); + return ret; + } + + hdev->hw.mac.autoneg = cmd->base.autoneg; + hdev->hw.mac.speed = cmd->base.speed; + hdev->hw.mac.duplex = cmd->base.duplex; + linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising); + + return 0; + } + + static int hclge_update_tp_port_info(struct hclge_dev *hdev) + { + struct ethtool_link_ksettings cmd; + int ret; + + if (!hnae3_dev_phy_imp_supported(hdev)) + return 0; + + ret = hclge_get_phy_link_ksettings(&hdev->vport->nic, &cmd); + if (ret) + return ret; + + hdev->hw.mac.autoneg = cmd.base.autoneg; + hdev->hw.mac.speed = cmd.base.speed; + hdev->hw.mac.duplex = cmd.base.duplex; + + return 0; + } + + static int hclge_tp_port_init(struct hclge_dev *hdev) + { + struct ethtool_link_ksettings cmd; + + if (!hnae3_dev_phy_imp_supported(hdev)) + return 0; + + cmd.base.autoneg = hdev->hw.mac.autoneg; + cmd.base.speed = hdev->hw.mac.speed; + cmd.base.duplex = hdev->hw.mac.duplex; + linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising); + + return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd); + } + static int hclge_update_port_info(struct hclge_dev *hdev) { struct hclge_mac *mac = &hdev->hw.mac; @@@ -2992,7 -3153,7 +3153,7 @@@
/* get the port info from SFP cmd if not copper port */ if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER) - return 0; + return hclge_update_tp_port_info(hdev);
/* if IMP does not support get SFP/qSFP info, return directly */ if (!hdev->support_sfp_query) @@@ -3012,7 -3173,7 +3173,7 @@@
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { if (mac->speed_type == QUERY_ACTIVE_SPEED) { - hclge_update_port_capability(mac); + hclge_update_port_capability(hdev, mac); return 0; } return hclge_cfg_mac_speed_dup(hdev, mac->speed, @@@ -3197,7 -3358,7 +3358,7 @@@ static irqreturn_t hclge_misc_irq_handl * caused this event. Therefore, we will do below for now: * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we * have defered type of reset to be used. - * 2. Schedule the reset serivce task. + * 2. Schedule the reset service task. * 3. When service task receives HNAE3_UNKNOWN_RESET type it * will fetch the correct type of reset. This would be done * by first decoding the types of errors. @@@ -3325,8 -3486,9 +3486,9 @@@ static void hclge_misc_irq_uninit(struc int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *client = hdev->nic_client; - u16 i; + int ret;
if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client) return 0; @@@ -3334,27 -3496,20 +3496,20 @@@ if (!client->ops->reset_notify) return -EOPNOTSUPP;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].nic; - int ret; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify nic client failed %d(%d)\n", type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n", + type, ret);
- return 0; + return ret; }
static int hclge_notify_roce_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].roce; struct hnae3_client *client = hdev->roce_client; int ret; - u16 i;
if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client) return 0; @@@ -3362,17 -3517,10 +3517,10 @@@ if (!client->ops->reset_notify) return -EOPNOTSUPP;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].roce; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify roce client failed %d(%d)", - type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)", + type, ret);
return ret; } @@@ -3440,7 -3588,7 +3588,7 @@@ static int hclge_set_all_vf_rst(struct { int i;
- for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) { + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; int ret;
@@@ -3521,14 -3669,12 +3669,12 @@@ void hclge_report_hw_error(struct hclge enum hnae3_hw_error_type type) { struct hnae3_client *client = hdev->nic_client; - u16 i;
if (!client || !client->ops->process_hw_error || !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state)) return;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) - client->ops->process_hw_error(&hdev->vport[i].nic, type); + client->ops->process_hw_error(&hdev->vport[0].nic, type); }
static void hclge_handle_imp_error(struct hclge_dev *hdev) @@@ -3794,6 -3940,21 +3940,21 @@@ static bool hclge_reset_err_handle(stru return false; }
+ static void hclge_update_reset_level(struct hclge_dev *hdev) + { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + enum hnae3_reset_type reset_level; + + /* if default_reset_request has a higher level reset request, + * it should be handled as soon as possible. since some errors + * need this kind of reset to fix. + */ + reset_level = hclge_get_reset_level(ae_dev, + &hdev->default_reset_request); + if (reset_level != HNAE3_NONE_RESET) + set_bit(reset_level, &hdev->reset_request); + } + static int hclge_set_rst_done(struct hclge_dev *hdev) { struct hclge_pf_rst_done_cmd *req; @@@ -3881,8 -4042,6 +4042,6 @@@ static int hclge_reset_prepare(struct h
static int hclge_reset_rebuild(struct hclge_dev *hdev) { - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); - enum hnae3_reset_type reset_level; int ret;
hdev->rst_stats.hw_reset_done_cnt++; @@@ -3926,14 -4085,7 +4085,7 @@@ hdev->rst_stats.reset_done_cnt++; clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
- /* if default_reset_request has a higher level reset request, - * it should be handled as soon as possible. since some errors - * need this kind of reset to fix. - */ - reset_level = hclge_get_reset_level(ae_dev, - &hdev->default_reset_request); - if (reset_level != HNAE3_NONE_RESET) - set_bit(reset_level, &hdev->reset_request); + hclge_update_reset_level(hdev);
return 0; } @@@ -3966,6 -4118,7 +4118,6 @@@ static void hclge_reset_event(struct pc * normalcy is to reset. * 2. A new reset request from the stack due to timeout * - * For the first case,error event might not have ae handle available. * check if this is a new reset request and we are not here just because * last reset attempt did not succeed and watchdog hit us again. We will * know this if last reset request did not occur very recently (watchdog @@@ -3975,14 -4128,14 +4127,14 @@@ * want to make sure we throttle the reset request. Therefore, we will * not allow it again before 3*HZ times. */ - if (!handle) - handle = &hdev->vport[0].nic;
if (time_before(jiffies, (hdev->last_reset_time + HCLGE_RESET_INTERVAL))) { mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL); return; - } else if (hdev->default_reset_request) { + } + + if (hdev->default_reset_request) { hdev->reset_level = hclge_get_reset_level(ae_dev, &hdev->default_reset_request); @@@ -4094,6 -4247,7 +4246,7 @@@ static void hclge_periodic_service_task hclge_update_link_status(hdev); hclge_sync_mac_table(hdev); hclge_sync_promisc_mode(hdev); + hclge_sync_fd_table(hdev);
if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { delta = jiffies - hdev->last_serv_processed; @@@ -4738,58 -4892,44 +4891,44 @@@ int hclge_rss_init_hw(struct hclge_dev
void hclge_rss_indir_init_cfg(struct hclge_dev *hdev) { - struct hclge_vport *vport = hdev->vport; - int i, j; + struct hclge_vport *vport = &hdev->vport[0]; + int i;
- for (j = 0; j < hdev->num_vmdq_vport + 1; j++) { - for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) - vport[j].rss_indirection_tbl[i] = - i % vport[j].alloc_rss_size; - } + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) + vport->rss_indirection_tbl[i] = i % vport->alloc_rss_size; }
static int hclge_rss_init_cfg(struct hclge_dev *hdev) { u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size; - int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; - struct hclge_vport *vport = hdev->vport; + int rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; + struct hclge_vport *vport = &hdev->vport[0]; + u16 *rss_ind_tbl;
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - u16 *rss_ind_tbl; - - vport[i].rss_tuple_sets.ipv4_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_sctp_en = - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv4_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_sctp_en = - hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? - HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv6_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - - vport[i].rss_algo = rss_algo; - - rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, - sizeof(*rss_ind_tbl), GFP_KERNEL); - if (!rss_ind_tbl) - return -ENOMEM; + vport->rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + + vport->rss_algo = rss_algo; + + rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, + sizeof(*rss_ind_tbl), GFP_KERNEL); + if (!rss_ind_tbl) + return -ENOMEM;
- vport[i].rss_indirection_tbl = rss_ind_tbl; - memcpy(vport[i].rss_hash_key, hclge_hash_key, - HCLGE_RSS_KEY_SIZE); - } + vport->rss_indirection_tbl = rss_ind_tbl; + memcpy(vport->rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE);
hclge_rss_indir_init_cfg(hdev);
@@@ -4995,6 -5135,285 +5134,285 @@@ static void hclge_request_update_promis set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); }
+ static void hclge_sync_fd_state(struct hclge_dev *hdev) + { + if (hlist_empty(&hdev->fd_rule_list)) + hdev->fd_active_type = HCLGE_FD_RULE_NONE; + } + + static void hclge_fd_inc_rule_cnt(struct hclge_dev *hdev, u16 location) + { + if (!test_bit(location, hdev->fd_bmap)) { + set_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num++; + } + } + + static void hclge_fd_dec_rule_cnt(struct hclge_dev *hdev, u16 location) + { + if (test_bit(location, hdev->fd_bmap)) { + clear_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num--; + } + } + + static void hclge_fd_free_node(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) + { + hlist_del(&rule->rule_node); + kfree(rule); + hclge_sync_fd_state(hdev); + } + + static void hclge_update_fd_rule_node(struct hclge_dev *hdev, + struct hclge_fd_rule *old_rule, + struct hclge_fd_rule *new_rule, + enum HCLGE_FD_NODE_STATE state) + { + switch (state) { + case HCLGE_FD_TO_ADD: + case HCLGE_FD_ACTIVE: + /* 1) if the new state is TO_ADD, just replace the old rule + * with the same location, no matter its state, because the + * new rule will be configured to the hardware. + * 2) if the new state is ACTIVE, it means the new rule + * has been configured to the hardware, so just replace + * the old rule node with the same location. + * 3) for it doesn't add a new node to the list, so it's + * unnecessary to update the rule number and fd_bmap. + */ + new_rule->rule_node.next = old_rule->rule_node.next; + new_rule->rule_node.pprev = old_rule->rule_node.pprev; + memcpy(old_rule, new_rule, sizeof(*old_rule)); + kfree(new_rule); + break; + case HCLGE_FD_DELETED: + hclge_fd_dec_rule_cnt(hdev, old_rule->location); + hclge_fd_free_node(hdev, old_rule); + break; + case HCLGE_FD_TO_DEL: + /* if new request is TO_DEL, and old rule is existent + * 1) the state of old rule is TO_DEL, we need do nothing, + * because we delete rule by location, other rule content + * is unncessary. + * 2) the state of old rule is ACTIVE, we need to change its + * state to TO_DEL, so the rule will be deleted when periodic + * task being scheduled. + * 3) the state of old rule is TO_ADD, it means the rule hasn't + * been added to hardware, so we just delete the rule node from + * fd_rule_list directly. + */ + if (old_rule->state == HCLGE_FD_TO_ADD) { + hclge_fd_dec_rule_cnt(hdev, old_rule->location); + hclge_fd_free_node(hdev, old_rule); + return; + } + old_rule->state = HCLGE_FD_TO_DEL; + break; + } + } + + static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist, + u16 location, + struct hclge_fd_rule **parent) + { + struct hclge_fd_rule *rule; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + if (rule->location == location) + return rule; + else if (rule->location > location) + return NULL; + /* record the parent node, use to keep the nodes in fd_rule_list + * in ascend order. + */ + *parent = rule; + } + + return NULL; + } + + /* insert fd rule node in ascend order according to rule->location */ + static void hclge_fd_insert_rule_node(struct hlist_head *hlist, + struct hclge_fd_rule *rule, + struct hclge_fd_rule *parent) + { + INIT_HLIST_NODE(&rule->rule_node); + + if (parent) + hlist_add_behind(&rule->rule_node, &parent->rule_node); + else + hlist_add_head(&rule->rule_node, hlist); + } + + static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev, + struct hclge_fd_user_def_cfg *cfg) + { + struct hclge_fd_user_def_cfg_cmd *req; + struct hclge_desc desc; + u16 data = 0; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false); + + req = (struct hclge_fd_user_def_cfg_cmd *)desc.data; + + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset); + req->ol2_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset); + req->ol3_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset); + req->ol4_cfg = cpu_to_le16(data); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to set fd user def data, ret= %d\n", ret); + return ret; + } + + static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev, bool locked) + { + int ret; + + if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state)) + return; + + if (!locked) + spin_lock_bh(&hdev->fd_rule_lock); + + ret = hclge_fd_set_user_def_cmd(hdev, hdev->fd_cfg.user_def_cfg); + if (ret) + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + + if (!locked) + spin_unlock_bh(&hdev->fd_rule_lock); + } + + static int hclge_fd_check_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) + { + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + struct hclge_fd_user_def_info *info, *old_info; + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return 0; + + /* for valid layer is start from 1, so need minus 1 to get the cfg */ + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + info = &rule->ep.user_def; + + if (!cfg->ref_cnt || cfg->offset == info->offset) + return 0; + + if (cfg->ref_cnt > 1) + goto error; + + fd_rule = hclge_find_fd_rule(hlist, rule->location, &parent); + if (fd_rule) { + old_info = &fd_rule->ep.user_def; + if (info->layer == old_info->layer) + return 0; + } + + error: + dev_err(&hdev->pdev->dev, + "No available offset for layer%d fd rule, each layer only support one user def offset.\n", + info->layer + 1); + return -ENOSPC; + } + + static void hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) + { + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + if (!cfg->ref_cnt) { + cfg->offset = rule->ep.user_def.offset; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } + cfg->ref_cnt++; + } + + static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) + { + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + if (!cfg->ref_cnt) + return; + + cfg->ref_cnt--; + if (!cfg->ref_cnt) { + cfg->offset = 0; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } + } + + static void hclge_update_fd_list(struct hclge_dev *hdev, + enum HCLGE_FD_NODE_STATE state, u16 location, + struct hclge_fd_rule *new_rule) + { + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + + fd_rule = hclge_find_fd_rule(hlist, location, &parent); + if (fd_rule) { + hclge_fd_dec_user_def_refcnt(hdev, fd_rule); + if (state == HCLGE_FD_ACTIVE) + hclge_fd_inc_user_def_refcnt(hdev, new_rule); + hclge_sync_fd_user_def_cfg(hdev, true); + + hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state); + return; + } + + /* it's unlikely to fail here, because we have checked the rule + * exist before. + */ + if (unlikely(state == HCLGE_FD_TO_DEL || state == HCLGE_FD_DELETED)) { + dev_warn(&hdev->pdev->dev, + "failed to delete fd rule %u, it's inexistent\n", + location); + return; + } + + hclge_fd_inc_user_def_refcnt(hdev, new_rule); + hclge_sync_fd_user_def_cfg(hdev, true); + + hclge_fd_insert_rule_node(hlist, new_rule, parent); + hclge_fd_inc_rule_cnt(hdev, new_rule->location); + + if (state == HCLGE_FD_TO_ADD) { + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); + } + } + static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) { struct hclge_get_fd_mode_cmd *req; @@@ -5073,6 -5492,17 +5491,17 @@@ static int hclge_set_fd_key_config(stru return ret; }
+ static void hclge_fd_disable_user_def(struct hclge_dev *hdev) + { + struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg; + + spin_lock_bh(&hdev->fd_rule_lock); + memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg)); + spin_unlock_bh(&hdev->fd_rule_lock); + + hclge_fd_set_user_def_cmd(hdev, cfg); + } + static int hclge_init_fd_config(struct hclge_dev *hdev) { #define LOW_2_WORDS 0x03 @@@ -5113,9 -5543,12 +5542,12 @@@ BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* If use max 400bit key, we can support tuples for ether type */ - if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) + if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) { key_cfg->tuple_active |= BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC); + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + }
/* roce_type is used to filter roce frames * dst_vport is used to specify the rule @@@ -5224,96 -5657,57 +5656,57 @@@ static int hclge_fd_ad_config(struct hc static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, struct hclge_fd_rule *rule) { + int offset, moffset, ip_offset; + enum HCLGE_FD_KEY_OPT key_opt; u16 tmp_x_s, tmp_y_s; u32 tmp_x_l, tmp_y_l; + u8 *p = (u8 *)rule; int i;
- if (rule->unused_tuple & tuple_bit) + if (rule->unused_tuple & BIT(tuple_bit)) return true;
- switch (tuple_bit) { - case BIT(INNER_DST_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - } + key_opt = tuple_key_info[tuple_bit].key_opt; + offset = tuple_key_info[tuple_bit].offset; + moffset = tuple_key_info[tuple_bit].moffset;
- return true; - case BIT(INNER_SRC_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - } + switch (key_opt) { + case KEY_OPT_U8: + calc_x(*key_x, p[offset], p[moffset]); + calc_y(*key_y, p[offset], p[moffset]);
return true; - case BIT(INNER_VLAN_TAG_FST): - calc_x(tmp_x_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); - calc_y(tmp_y_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); + case KEY_OPT_LE16: + calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); + calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); *(__le16 *)key_x = cpu_to_le16(tmp_x_s); *(__le16 *)key_y = cpu_to_le16(tmp_y_s);
return true; - case BIT(INNER_ETH_TYPE): - calc_x(tmp_x_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - calc_y(tmp_y_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); - - return true; - case BIT(INNER_IP_TOS): - calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - - return true; - case BIT(INNER_IP_PROTO): - calc_x(*key_x, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - calc_y(*key_y, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - - return true; - case BIT(INNER_SRC_IP): - calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - *(__le32 *)key_x = cpu_to_le32(tmp_x_l); - *(__le32 *)key_y = cpu_to_le32(tmp_y_l); - - return true; - case BIT(INNER_DST_IP): - calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); + case KEY_OPT_LE32: + calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); *(__le32 *)key_x = cpu_to_le32(tmp_x_l); *(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true; - case BIT(INNER_SRC_PORT): - calc_x(tmp_x_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - calc_y(tmp_y_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_MAC: + for (i = 0; i < ETH_ALEN; i++) { + calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + }
return true; - case BIT(INNER_DST_PORT): - calc_x(tmp_x_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - calc_y(tmp_y_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_IP: + ip_offset = IPV4_INDEX * sizeof(u32); + calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + *(__le32 *)key_x = cpu_to_le32(tmp_x_l); + *(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true; default: @@@ -5401,12 -5795,12 +5794,12 @@@ static int hclge_config_key(struct hclg
for (i = 0 ; i < MAX_TUPLE; i++) { bool tuple_valid; - u32 check_tuple;
tuple_size = tuple_key_info[i].key_length / 8; - check_tuple = key_cfg->tuple_active & BIT(i); + if (!(key_cfg->tuple_active & BIT(i))) + continue;
- tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x, + tuple_valid = hclge_fd_convert_tuple(i, cur_key_x, cur_key_y, rule); if (tuple_valid) { cur_key_x += tuple_size; @@@ -5537,8 -5931,7 +5930,7 @@@ static int hclge_fd_check_tcpip6_tuple( if (!spec || !unused_tuple) return -EINVAL;
- *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS); + *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
/* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@@ -5553,8 -5946,8 +5945,8 @@@ if (!spec->pdst) *unused_tuple |= BIT(INNER_DST_PORT);
- if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS);
return 0; } @@@ -5566,7 -5959,7 +5958,7 @@@ static int hclge_fd_check_ip6_tuple(str return -EINVAL;
*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); + BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@@ -5578,8 -5971,8 +5970,8 @@@ if (!spec->l4_proto) *unused_tuple |= BIT(INNER_IP_PROTO);
- if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS);
if (spec->l4_4_bytes) return -EOPNOTSUPP; @@@ -5649,9 -6042,98 +6041,98 @@@ static int hclge_fd_check_ext_tuple(str return 0; }
+ static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple, + struct hclge_fd_user_def_info *info) + { + switch (flow_type) { + case ETHER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L2; + *unused_tuple &= ~BIT(INNER_L2_RSV); + break; + case IP_USER_FLOW: + case IPV6_USER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L3; + *unused_tuple &= ~BIT(INNER_L3_RSV); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + info->layer = HCLGE_FD_USER_DEF_L4; + *unused_tuple &= ~BIT(INNER_L4_RSV); + break; + default: + return -EOPNOTSUPP; + } + + return 0; + } + + static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs) + { + return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0; + } + + static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) + { + u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active; + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + u16 data, offset, data_mask, offset_mask; + int ret; + + info->layer = HCLGE_FD_USER_DEF_NONE; + *unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + + if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs)) + return 0; + + /* user-def data from ethtool is 64 bit value, the bit0~15 is used + * for data, and bit32~47 is used for offset. + */ + data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + + if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) { + dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); + return -EOPNOTSUPP; + } + + if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) { + dev_err(&hdev->pdev->dev, + "user-def offset[%u] should be no more than %u\n", + offset, HCLGE_FD_MAX_USER_DEF_OFFSET); + return -EINVAL; + } + + if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) { + dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n"); + return -EINVAL; + } + + ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info); + if (ret) { + dev_err(&hdev->pdev->dev, + "unsupported flow type for user-def bytes, ret = %d\n", + ret); + return ret; + } + + info->data = data; + info->data_mask = data_mask; + info->offset = offset; + + return 0; + } + static int hclge_fd_check_spec(struct hclge_dev *hdev, struct ethtool_rx_flow_spec *fs, - u32 *unused_tuple) + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) { u32 flow_type; int ret; @@@ -5664,11 -6146,9 +6145,9 @@@ return -EINVAL; }
- if ((fs->flow_type & FLOW_EXT) && - (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) { - dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); - return -EOPNOTSUPP; - } + ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info); + if (ret) + return ret;
flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); switch (flow_type) { @@@ -5720,217 -6200,194 +6199,194 @@@ return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple); }
- static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location) + static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - struct hclge_fd_rule *rule = NULL; - struct hlist_node *node2; + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
- spin_lock_bh(&hdev->fd_rule_lock); - hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - } + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
- spin_unlock_bh(&hdev->fd_rule_lock); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
- return rule && rule->location == location; - } + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
- /* make sure being called after lock up with fd_rule_lock */ - static int hclge_fd_update_rule_list(struct hclge_dev *hdev, - struct hclge_fd_rule *new_rule, - u16 location, - bool is_add) - { - struct hclge_fd_rule *rule = NULL, *parent = NULL; - struct hlist_node *node2; + rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
- if (is_add && !new_rule) - return -EINVAL; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF;
- hlist_for_each_entry_safe(rule, node2, - &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - parent = rule; - } - - if (rule && rule->location == location) { - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; - - if (!is_add) { - if (!hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_RULE_NONE; - clear_bit(location, hdev->fd_bmap); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; + }
- return 0; - } - } else if (!is_add) { - dev_err(&hdev->pdev->dev, - "delete fail, rule %u is inexistent\n", - location); - return -EINVAL; - } + static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) + { + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
- INIT_HLIST_NODE(&new_rule->rule_node); + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
- if (parent) - hlist_add_behind(&new_rule->rule_node, &parent->rule_node); - else - hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list); + rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
- set_bit(location, hdev->fd_bmap); - hdev->hclge_fd_rule_num++; - hdev->fd_active_type = new_rule->rule_type; + rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
- return 0; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF; }
- static int hclge_fd_get_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, - struct hclge_fd_rule *rule) + static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); - - switch (flow_type) { - case SCTP_V4_FLOW: - case TCP_V4_FLOW: - case UDP_V4_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src); + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src, + IPV6_SIZE);
- rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst); + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE);
- rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
- rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst); + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
- rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos; + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF;
- rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
- break; - case IP_USER_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src); - - rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; + }
- rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos; + static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) + { + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src, + IPV6_SIZE);
- rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto; + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst, + IPV6_SIZE);
- rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
- break; - case SCTP_V6_FLOW: - case TCP_V6_FLOW: - case UDP_V6_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE); + rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
- be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF; + }
- rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc); + static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) + { + ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source); + ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source);
- rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst); + ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest); + ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest);
- rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto); + rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto); + }
+ static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info, + struct hclge_fd_rule *rule) + { + switch (info->layer) { + case HCLGE_FD_USER_DEF_L2: + rule->tuples.l2_user_def = info->data; + rule->tuples_mask.l2_user_def = info->data_mask; break; - case IPV6_USER_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE); - - be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - - rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto; - - rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; - + case HCLGE_FD_USER_DEF_L3: + rule->tuples.l3_user_def = info->data; + rule->tuples_mask.l3_user_def = info->data_mask; break; - case ETHER_FLOW: - ether_addr_copy(rule->tuples.src_mac, - fs->h_u.ether_spec.h_source); - ether_addr_copy(rule->tuples_mask.src_mac, - fs->m_u.ether_spec.h_source); - - ether_addr_copy(rule->tuples.dst_mac, - fs->h_u.ether_spec.h_dest); - ether_addr_copy(rule->tuples_mask.dst_mac, - fs->m_u.ether_spec.h_dest); - - rule->tuples.ether_proto = - be16_to_cpu(fs->h_u.ether_spec.h_proto); - rule->tuples_mask.ether_proto = - be16_to_cpu(fs->m_u.ether_spec.h_proto); - + case HCLGE_FD_USER_DEF_L4: + rule->tuples.l4_user_def = (u32)info->data << 16; + rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16; break; default: - return -EOPNOTSUPP; + break; }
+ rule->ep.user_def = *info; + } + + static int hclge_fd_get_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, + struct hclge_fd_user_def_info *info) + { + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + switch (flow_type) { case SCTP_V4_FLOW: - case SCTP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_SCTP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP); break; case TCP_V4_FLOW: - case TCP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_TCP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP); break; case UDP_V4_FLOW: + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP); + break; + case IP_USER_FLOW: + hclge_fd_get_ip4_tuple(hdev, fs, rule); + break; + case SCTP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP); + break; + case TCP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP); + break; case UDP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_UDP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP); break; - default: + case IPV6_USER_FLOW: + hclge_fd_get_ip6_tuple(hdev, fs, rule); + break; + case ETHER_FLOW: + hclge_fd_get_ether_tuple(hdev, fs, rule); break; + default: + return -EOPNOTSUPP; }
if (fs->flow_type & FLOW_EXT) { rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci); rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci); + hclge_fd_get_user_def_tuple(info, rule); }
if (fs->flow_type & FLOW_MAC_EXT) { @@@ -5941,33 -6398,53 +6397,53 @@@ return 0; }
- /* make sure being called after lock up with fd_rule_lock */ static int hclge_fd_config_rule(struct hclge_dev *hdev, struct hclge_fd_rule *rule) { int ret;
- if (!rule) { + ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) + return ret; + + return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); + } + + static int hclge_add_fd_entry_common(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) + { + int ret; + + spin_lock_bh(&hdev->fd_rule_lock); + + if (hdev->fd_active_type != rule->rule_type && + (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) { dev_err(&hdev->pdev->dev, - "The flow director rule is NULL\n"); + "mode conflict(new type %d, active type %d), please delete existent rules first\n", + rule->rule_type, hdev->fd_active_type); + spin_unlock_bh(&hdev->fd_rule_lock); return -EINVAL; }
- /* it will never fail here, so needn't to check return value */ - hclge_fd_update_rule_list(hdev, rule, rule->location, true); + ret = hclge_fd_check_user_def_refcnt(hdev, rule); + if (ret) + goto out;
- ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + ret = hclge_clear_arfs_rules(hdev); if (ret) - goto clear_rule; + goto out;
- ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); + ret = hclge_fd_config_rule(hdev, rule); if (ret) - goto clear_rule; + goto out;
- return 0; + rule->state = HCLGE_FD_ACTIVE; + hdev->fd_active_type = rule->rule_type; + hclge_update_fd_list(hdev, rule->state, rule->location, rule);
- clear_rule: - hclge_fd_update_rule_list(hdev, rule, rule->location, false); + out: + spin_unlock_bh(&hdev->fd_rule_lock); return ret; }
@@@ -5979,11 -6456,48 +6455,48 @@@ static bool hclge_is_cls_flower_active( return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE; }
+ static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie, + u16 *vport_id, u8 *action, u16 *queue_id) + { + struct hclge_vport *vport = hdev->vport; + + if (ring_cookie == RX_CLS_FLOW_DISC) { + *action = HCLGE_FD_ACTION_DROP_PACKET; + } else { + u32 ring = ethtool_get_flow_spec_ring(ring_cookie); + u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie); + u16 tqps; + + if (vf > hdev->num_req_vfs) { + dev_err(&hdev->pdev->dev, + "Error: vf id (%u) > max vf num (%u)\n", + vf, hdev->num_req_vfs); + return -EINVAL; + } + + *vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; + tqps = hdev->vport[vf].nic.kinfo.num_tqps; + + if (ring >= tqps) { + dev_err(&hdev->pdev->dev, + "Error: queue id (%u) > max tqp num (%u)\n", + ring, tqps - 1); + return -EINVAL; + } + + *action = HCLGE_FD_ACTION_SELECT_QUEUE; + *queue_id = ring; + } + + return 0; + } + static int hclge_add_fd_entry(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + struct hclge_fd_user_def_info info; u16 dst_vport_id = 0, q_index = 0; struct ethtool_rx_flow_spec *fs; struct hclge_fd_rule *rule; @@@ -6003,51 -6517,22 +6516,22 @@@ return -EOPNOTSUPP; }
- if (hclge_is_cls_flower_active(handle)) { - dev_err(&hdev->pdev->dev, - "please delete all exist cls flower rules first\n"); - return -EINVAL; - } - fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
- ret = hclge_fd_check_spec(hdev, fs, &unused); + ret = hclge_fd_check_spec(hdev, fs, &unused, &info); if (ret) return ret;
- if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = HCLGE_FD_ACTION_DROP_PACKET; - } else { - u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); - u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - u16 tqps; - - if (vf > hdev->num_req_vfs) { - dev_err(&hdev->pdev->dev, - "Error: vf id (%u) > max vf num (%u)\n", - vf, hdev->num_req_vfs); - return -EINVAL; - } - - dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; - tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps; - - if (ring >= tqps) { - dev_err(&hdev->pdev->dev, - "Error: queue id (%u) > max tqp num (%u)\n", - ring, tqps - 1); - return -EINVAL; - } - - action = HCLGE_FD_ACTION_SELECT_QUEUE; - q_index = ring; - } + ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id, + &action, &q_index); + if (ret) + return ret;
rule = kzalloc(sizeof(*rule), GFP_KERNEL); if (!rule) return -ENOMEM;
- ret = hclge_fd_get_tuple(hdev, fs, rule); + ret = hclge_fd_get_tuple(hdev, fs, rule, &info); if (ret) { kfree(rule); return ret; @@@ -6061,15 -6546,9 +6545,9 @@@ rule->action = action; rule->rule_type = HCLGE_FD_EP_ACTIVE;
- /* to avoid rule conflict, when user configure rule by ethtool, - * we need to clear all arfs rules - */ - spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule);
return ret; } @@@ -6090,32 -6569,30 +6568,30 @@@ static int hclge_del_fd_entry(struct hn if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) return -EINVAL;
- if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num || - !hclge_fd_rule_exist(hdev, fs->location)) { + spin_lock_bh(&hdev->fd_rule_lock); + if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + !test_bit(fs->location, hdev->fd_bmap)) { dev_err(&hdev->pdev->dev, "Delete fail, rule %u is inexistent\n", fs->location); + spin_unlock_bh(&hdev->fd_rule_lock); return -ENOENT; }
ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location, NULL, false); if (ret) - return ret; + goto out;
- spin_lock_bh(&hdev->fd_rule_lock); - ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false); + hclge_update_fd_list(hdev, HCLGE_FD_DELETED, fs->location, NULL);
+ out: spin_unlock_bh(&hdev->fd_rule_lock); - return ret; }
- /* make sure being called after lock up with fd_rule_lock */ - static void hclge_del_all_fd_entries(struct hnae3_handle *handle, - bool clear_list) + static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev, + bool clear_list) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; u16 location; @@@ -6123,6 -6600,8 +6599,8 @@@ if (!hnae3_dev_fd_supported(hdev)) return;
+ spin_lock_bh(&hdev->fd_rule_lock); + for_each_set_bit(location, hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location, @@@ -6139,6 -6618,14 +6617,14 @@@ bitmap_zero(hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); } + + spin_unlock_bh(&hdev->fd_rule_lock); + } + + static void hclge_del_all_fd_entries(struct hclge_dev *hdev) + { + hclge_clear_fd_rules_in_list(hdev, true); + hclge_fd_disable_user_def(hdev); }
static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@@ -6147,7 -6634,6 +6633,6 @@@ struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; - int ret;
/* Return ok here, because reset error handling will check this * return value. If error is returned here, the reset process will @@@ -6162,25 -6648,11 +6647,11 @@@
spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (!ret) - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); - - if (ret) { - dev_warn(&hdev->pdev->dev, - "Restore rule %u failed, remove it\n", - rule->location); - clear_bit(rule->location, hdev->fd_bmap); - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; - } + if (rule->state == HCLGE_FD_ACTIVE) + rule->state = HCLGE_FD_TO_ADD; } - - if (hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_EP_ACTIVE; - spin_unlock_bh(&hdev->fd_rule_lock); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
return 0; } @@@ -6268,6 -6740,10 +6739,10 @@@ static void hclge_fd_get_tcpip6_info(st cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE);
+ spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->psrc = cpu_to_be16(rule->tuples.src_port); spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ? 0 : cpu_to_be16(rule->tuples_mask.src_port); @@@ -6295,6 -6771,10 +6770,10 @@@ static void hclge_fd_get_ip6_info(struc cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE);
+ spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->l4_proto = rule->tuples.ip_proto; spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ? 0 : rule->tuples_mask.ip_proto; @@@ -6322,6 -6802,24 +6801,24 @@@ static void hclge_fd_get_ether_info(str 0 : cpu_to_be16(rule->tuples_mask.ether_proto); }
+ static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) + { + if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) == + HCLGE_FD_TUPLE_USER_DEF_TUPLES) { + fs->h_ext.data[0] = 0; + fs->h_ext.data[1] = 0; + fs->m_ext.data[0] = 0; + fs->m_ext.data[1] = 0; + } else { + fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset); + fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data); + fs->m_ext.data[0] = + cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK); + fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask); + } + } + static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { @@@ -6330,6 -6828,8 +6827,8 @@@ fs->m_ext.vlan_tci = rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ? 0 : cpu_to_be16(rule->tuples_mask.vlan_tag1); + + hclge_fd_get_user_def_info(fs, rule); }
if (fs->flow_type & FLOW_MAC_EXT) { @@@ -6441,6 -6941,9 +6940,9 @@@ static int hclge_get_all_rules(struct h return -EMSGSIZE; }
+ if (rule->state == HCLGE_FD_TO_DEL) + continue; + rule_locs[cnt] = rule->location; cnt++; } @@@ -6500,6 -7003,7 +7002,7 @@@ static void hclge_fd_build_arfs_rule(co rule->action = 0; rule->vf_id = 0; rule->rule_type = HCLGE_FD_ARFS_ACTIVE; + rule->state = HCLGE_FD_TO_ADD; if (tuples->ether_proto == ETH_P_IP) { if (tuples->ip_proto == IPPROTO_TCP) rule->flow_type = TCP_V4_FLOW; @@@ -6522,9 -7026,7 +7025,7 @@@ static int hclge_add_fd_entry_by_arfs(s struct hclge_fd_rule_tuples new_tuples = {}; struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; - u16 tmp_queue_id; u16 bit_id; - int ret;
if (!hnae3_dev_fd_supported(hdev)) return -EOPNOTSUPP; @@@ -6560,34 -7062,19 +7061,19 @@@ return -ENOMEM; }
- set_bit(bit_id, hdev->fd_bmap); rule->location = bit_id; rule->arfs.flow_id = flow_id; rule->queue_id = queue_id; hclge_fd_build_arfs_rule(&new_tuples, rule); - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) - return ret; - - return rule->location; + hclge_update_fd_list(hdev, rule->state, rule->location, rule); + hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE; + } else if (rule->queue_id != queue_id) { + rule->queue_id = queue_id; + rule->state = HCLGE_FD_TO_ADD; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); } - spin_unlock_bh(&hdev->fd_rule_lock); - - if (rule->queue_id == queue_id) - return rule->location; - - tmp_queue_id = rule->queue_id; - rule->queue_id = queue_id; - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) { - rule->queue_id = tmp_queue_id; - return ret; - } - return rule->location; }
@@@ -6597,7 -7084,6 +7083,6 @@@ static void hclge_rfs_filter_expire(str struct hnae3_handle *handle = &hdev->vport[0].nic; struct hclge_fd_rule *rule; struct hlist_node *node; - HLIST_HEAD(del_list);
spin_lock_bh(&hdev->fd_rule_lock); if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) { @@@ -6605,34 -7091,51 +7090,51 @@@ return; } hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rule->state != HCLGE_FD_ACTIVE) + continue; if (rps_may_expire_flow(handle->netdev, rule->queue_id, rule->arfs.flow_id, rule->location)) { - hlist_del_init(&rule->rule_node); - hlist_add_head(&rule->rule_node, &del_list); - hdev->hclge_fd_rule_num--; - clear_bit(rule->location, hdev->fd_bmap); - } - } - spin_unlock_bh(&hdev->fd_rule_lock); - - hlist_for_each_entry_safe(rule, node, &del_list, rule_node) { - hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - rule->location, NULL, false); - kfree(rule); + rule->state = HCLGE_FD_TO_DEL; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + } } + spin_unlock_bh(&hdev->fd_rule_lock); #endif }
/* make sure being called after lock up with fd_rule_lock */ - static void hclge_clear_arfs_rules(struct hnae3_handle *handle) + static int hclge_clear_arfs_rules(struct hclge_dev *hdev) { #ifdef CONFIG_RFS_ACCEL - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret; + + if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) + return 0; + + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_DEL: + case HCLGE_FD_ACTIVE: + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) + return ret; + fallthrough; + case HCLGE_FD_TO_ADD: + hclge_fd_dec_rule_cnt(hdev, rule->location); + hlist_del(&rule->rule_node); + kfree(rule); + break; + default: + break; + } + } + hclge_sync_fd_state(hdev);
- if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE) - hclge_del_all_fd_entries(handle, true); #endif + return 0; }
static void hclge_get_cls_key_basic(const struct flow_rule *flow, @@@ -6814,12 -7317,6 +7316,6 @@@ static int hclge_add_cls_flower(struct struct hclge_fd_rule *rule; int ret;
- if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) { - dev_err(&hdev->pdev->dev, - "please remove all exist fd rules via ethtool first\n"); - return -EINVAL; - } - ret = hclge_check_cls_flower(hdev, cls_flower, tc); if (ret) { dev_err(&hdev->pdev->dev, @@@ -6832,8 -7329,10 +7328,10 @@@ return -ENOMEM;
ret = hclge_parse_cls_flower(hdev, cls_flower, rule); - if (ret) - goto err; + if (ret) { + kfree(rule); + return ret; + }
rule->action = HCLGE_FD_ACTION_SELECT_TC; rule->cls_flower.tc = tc; @@@ -6842,22 -7341,10 +7340,10 @@@ rule->cls_flower.cookie = cls_flower->cookie; rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE;
- spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to add cls flower rule, ret = %d\n", ret); - goto err; - } + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule);
- return 0; - err: - kfree(rule); return ret; }
@@@ -6894,25 -7381,66 +7380,66 @@@ static int hclge_del_cls_flower(struct ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location, NULL, false); if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u, ret = %d\n", - rule->location, ret); spin_unlock_bh(&hdev->fd_rule_lock); return ret; }
- ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false); - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u in list, ret = %d\n", - rule->location, ret); - spin_unlock_bh(&hdev->fd_rule_lock); - return ret; + hclge_update_fd_list(hdev, HCLGE_FD_DELETED, rule->location, NULL); + spin_unlock_bh(&hdev->fd_rule_lock); + + return 0; + } + + static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist) + { + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret = 0; + + if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state)) + return; + + spin_lock_bh(&hdev->fd_rule_lock); + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_ADD: + ret = hclge_fd_config_rule(hdev, rule); + if (ret) + goto out; + rule->state = HCLGE_FD_ACTIVE; + break; + case HCLGE_FD_TO_DEL: + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) + goto out; + hclge_fd_dec_rule_cnt(hdev, rule->location); + hclge_fd_free_node(hdev, rule); + break; + default: + break; + } }
+ out: + if (ret) + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + spin_unlock_bh(&hdev->fd_rule_lock); + }
- return 0; + static void hclge_sync_fd_table(struct hclge_dev *hdev) + { + if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) { + bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; + + hclge_clear_fd_rules_in_list(hdev, clear_list); + } + + hclge_sync_fd_user_def_cfg(hdev, false); + + hclge_sync_fd_list(hdev, &hdev->fd_rule_list); }
static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle) @@@ -6952,18 -7480,15 +7479,15 @@@ static void hclge_enable_fd(struct hnae { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - bool clear;
hdev->fd_en = enable; - clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
- if (!enable) { - spin_lock_bh(&hdev->fd_rule_lock); - hclge_del_all_fd_entries(handle, clear); - spin_unlock_bh(&hdev->fd_rule_lock); - } else { + if (!enable) + set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state); + else hclge_restore_fd_entries(handle); - } + + hclge_task_schedule(hdev, 0); }
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) @@@ -7124,19 -7649,19 +7648,19 @@@ static int hclge_set_app_loopback(struc return ret; }
- static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en, + static int hclge_cfg_common_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { - #define HCLGE_SERDES_RETRY_MS 10 - #define HCLGE_SERDES_RETRY_NUM 100 + #define HCLGE_COMMON_LB_RETRY_MS 10 + #define HCLGE_COMMON_LB_RETRY_NUM 100
- struct hclge_serdes_lb_cmd *req; + struct hclge_common_lb_cmd *req; struct hclge_desc desc; int ret, i = 0; u8 loop_mode_b;
- req = (struct hclge_serdes_lb_cmd *)desc.data; - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false); + req = (struct hclge_common_lb_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, false);
switch (loop_mode) { case HNAE3_LOOP_SERIAL_SERDES: @@@ -7145,9 -7670,12 +7669,12 @@@ case HNAE3_LOOP_PARALLEL_SERDES: loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B; break; + case HNAE3_LOOP_PHY: + loop_mode_b = HCLGE_CMD_GE_PHY_INNER_LOOP_B; + break; default: dev_err(&hdev->pdev->dev, - "unsupported serdes loopback mode %d\n", loop_mode); + "unsupported common loopback mode %d\n", loop_mode); return -ENOTSUPP; }
@@@ -7161,39 -7689,39 +7688,39 @@@ ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "serdes loopback set fail, ret = %d\n", ret); + "common loopback set fail, ret = %d\n", ret); return ret; }
do { - msleep(HCLGE_SERDES_RETRY_MS); - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, + msleep(HCLGE_COMMON_LB_RETRY_MS); + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "serdes loopback get, ret = %d\n", ret); + "common loopback get, ret = %d\n", ret); return ret; } - } while (++i < HCLGE_SERDES_RETRY_NUM && - !(req->result & HCLGE_CMD_SERDES_DONE_B)); + } while (++i < HCLGE_COMMON_LB_RETRY_NUM && + !(req->result & HCLGE_CMD_COMMON_LB_DONE_B));
- if (!(req->result & HCLGE_CMD_SERDES_DONE_B)) { - dev_err(&hdev->pdev->dev, "serdes loopback set timeout\n"); + if (!(req->result & HCLGE_CMD_COMMON_LB_DONE_B)) { + dev_err(&hdev->pdev->dev, "common loopback set timeout\n"); return -EBUSY; - } else if (!(req->result & HCLGE_CMD_SERDES_SUCCESS_B)) { - dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n"); + } else if (!(req->result & HCLGE_CMD_COMMON_LB_SUCCESS_B)) { + dev_err(&hdev->pdev->dev, "common loopback set failed in fw\n"); return -EIO; } return ret; }
- static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, + static int hclge_set_common_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { int ret;
- ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode); + ret = hclge_cfg_common_loopback(hdev, en, loop_mode); if (ret) return ret;
@@@ -7242,8 -7770,12 +7769,12 @@@ static int hclge_set_phy_loopback(struc struct phy_device *phydev = hdev->hw.mac.phydev; int ret;
- if (!phydev) + if (!phydev) { + if (hnae3_dev_phy_imp_supported(hdev)) + return hclge_set_common_loopback(hdev, en, + HNAE3_LOOP_PHY); return -ENOTSUPP; + }
if (en) ret = hclge_enable_phy_loopback(hdev, phydev); @@@ -7265,13 -7797,12 +7796,12 @@@ return ret; }
- static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) + static int hclge_tqp_enable_cmd_send(struct hclge_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclge_desc desc; struct hclge_cfg_com_tqp_queue_cmd *req = (struct hclge_cfg_com_tqp_queue_cmd *)desc.data; - int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); req->tqp_id = cpu_to_le16(tqp_id); @@@ -7279,20 -7810,30 +7809,30 @@@ if (enable) req->enable |= 1U << HCLGE_TQP_ENABLE_B;
- ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) - dev_err(&hdev->pdev->dev, - "Tqp enable fail, status =%d.\n", ret); - return ret; + return hclge_cmd_send(&hdev->hw, &desc, 1); + } + + static int hclge_tqp_enable(struct hnae3_handle *handle, bool enable) + { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclge_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + return 0; }
static int hclge_set_loopback(struct hnae3_handle *handle, enum hnae3_loop loop_mode, bool en) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hnae3_knic_private_info *kinfo; struct hclge_dev *hdev = vport->back; - int i, ret; + int ret;
/* Loopback can be enabled in three places: SSU, MAC, and serdes. By * default, SSU loopback is enabled, so if the SMAC and the DMAC are @@@ -7314,7 -7855,7 +7854,7 @@@ break; case HNAE3_LOOP_SERIAL_SERDES: case HNAE3_LOOP_PARALLEL_SERDES: - ret = hclge_set_serdes_loopback(hdev, en, loop_mode); + ret = hclge_set_common_loopback(hdev, en, loop_mode); break; case HNAE3_LOOP_PHY: ret = hclge_set_phy_loopback(hdev, en); @@@ -7329,14 -7870,12 +7869,12 @@@ if (ret) return ret;
- kinfo = &vport->nic.kinfo; - for (i = 0; i < kinfo->num_tqps; i++) { - ret = hclge_tqp_enable(hdev, i, 0, en); - if (ret) - return ret; - } + ret = hclge_tqp_enable(handle, en); + if (ret) + dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n", + en ? "enable" : "disable", ret);
- return 0; + return ret; }
static int hclge_set_default_loopback(struct hclge_dev *hdev) @@@ -7347,11 -7886,11 +7885,11 @@@ if (ret) return ret;
- ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); + ret = hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); if (ret) return ret;
- return hclge_cfg_serdes_loopback(hdev, false, + return hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_PARALLEL_SERDES); }
@@@ -7423,11 -7962,10 +7961,10 @@@ static void hclge_ae_stop(struct hnae3_ { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int i;
set_bit(HCLGE_STATE_DOWN, &hdev->state); spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); + hclge_clear_arfs_rules(hdev); spin_unlock_bh(&hdev->fd_rule_lock);
/* If it is not PF reset, the firmware will disable the MAC, @@@ -7440,8 -7978,7 +7977,7 @@@ return; }
- for (i = 0; i < handle->kinfo.num_tqps; i++) - hclge_reset_tqp(handle, i); + hclge_reset_tqp(handle);
hclge_config_mac_tnl_int(hdev, false);
@@@ -7891,7 -8428,7 +8427,7 @@@ int hclge_update_mac_list(struct hclge_
/* if the mac addr is already in the mac list, no need to add a new * one into it, just check the mac addr state, convert it to a new - * new state, or just remove it, or do nothing. + * state, or just remove it, or do nothing. */ mac_node = hclge_find_mac_node(list, addr); if (mac_node) { @@@ -8080,7 -8617,6 +8616,6 @@@ int hclge_add_mc_addr_common(struct hcl if (status) return status; status = hclge_add_mac_vlan_tbl(vport, &req, desc); - /* if already overflow, not to print each time */ if (status == -ENOSPC && !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE)) @@@ -8129,7 -8665,6 +8664,6 @@@ int hclge_rm_mc_addr_common(struct hclg else /* Not all the vfid is zero, update the vfid */ status = hclge_add_mac_vlan_tbl(vport, &req, desc); - } else if (status == -ENOENT) { status = 0; } @@@ -8564,7 -9099,7 +9098,7 @@@ static bool hclge_check_vf_mac_exist(st return true;
vf_idx += HCLGE_VF_VPORT_START_NUM; - for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) if (i != vf_idx && ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) return true; @@@ -8758,6 -9293,29 +9292,29 @@@ static int hclge_set_mac_addr(struct hn return 0; }
+ static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) + { + struct mii_ioctl_data *data = if_mii(ifr); + + if (!hnae3_dev_phy_imp_supported(hdev)) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = hdev->hw.mac.phy_addr; + /* this command reads phy id and register at the same time */ + fallthrough; + case SIOCGMIIREG: + data->val_out = hclge_read_phy_reg(hdev, data->reg_num); + return 0; + + case SIOCSMIIREG: + return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); + default: + return -EOPNOTSUPP; + } + } + static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr, int cmd) { @@@ -8765,7 -9323,7 +9322,7 @@@ struct hclge_dev *hdev = vport->back;
if (!hdev->hw.mac.phydev) - return -EOPNOTSUPP; + return hclge_mii_ioctl(hdev, ifr, cmd);
return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd); } @@@ -8922,8 -9480,7 +9479,7 @@@ static int hclge_check_vf_vlan_cmd_stat }
static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, - bool is_kill, u16 vlan, - __be16 proto) + bool is_kill, u16 vlan) { struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_desc desc[2]; @@@ -8989,8 -9546,7 +9545,7 @@@ static int hclge_set_vlan_filter_hw(str if (is_kill && !vlan_id) return 0;
- ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id, - proto); + ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id); if (ret) { dev_err(&hdev->pdev->dev, "Set %u vport vlan filter config fail, ret =%d.\n", @@@ -9440,7 -9996,7 +9995,7 @@@ static void hclge_restore_hw_table(stru hclge_restore_mac_table_common(vport); hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); - + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); }
@@@ -9796,7 -10352,7 +10351,7 @@@ out return ret; }
- static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, + static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id, bool enable) { struct hclge_reset_tqp_queue_cmd *req; @@@ -9852,94 -10408,114 +10407,114 @@@ u16 hclge_covert_handle_qid_global(stru return tqp->index; }
- int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) + static int hclge_reset_tqp_cmd(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; + u16 reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + u16 i;
- queue_gid = hclge_covert_handle_qid_global(handle, queue_id); - - ret = hclge_tqp_enable(hdev, queue_id, 0, false); - if (ret) { - dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret); - return ret; - } + for (i = 0; i < handle->kinfo.num_tqps; i++) { + queue_gid = hclge_covert_handle_qid_global(handle, i); + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to send reset tqp cmd, ret = %d\n", + ret); + return ret; + }
- ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); - if (ret) { - dev_err(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return ret; - } + while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { + reset_status = hclge_get_reset_status(hdev, queue_gid); + if (reset_status) + break;
- while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + /* Wait for tqp hw reset */ + usleep_range(1000, 1200); + }
- /* Wait for tqp hw reset */ - usleep_range(1000, 1200); - } + if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { + dev_err(&hdev->pdev->dev, + "wait for tqp hw reset timeout\n"); + return -ETIME; + }
- if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_err(&hdev->pdev->dev, "Reset TQP fail\n"); - return ret; + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to deassert soft reset, ret = %d\n", + ret); + return ret; + } + reset_try_times = 0; } - - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_err(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); - - return ret; + return 0; }
- void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) + static int hclge_reset_rcb(struct hnae3_handle *handle) { - struct hnae3_handle *handle = &vport->nic; + #define HCLGE_RESET_RCB_NOT_SUPPORT 0U + #define HCLGE_RESET_RCB_SUCCESS 1U + + struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; - int reset_status; + struct hclge_reset_cmd *req; + struct hclge_desc desc; + u8 return_status; u16 queue_gid; int ret;
- if (queue_id >= handle->kinfo.num_tqps) { - dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", - queue_id); - return; - } + queue_gid = hclge_covert_handle_qid_global(handle, 0);
- queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); + req = (struct hclge_reset_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false); + hnae3_set_bit(req->fun_reset_rcb, HCLGE_CFG_RESET_RCB_B, 1); + req->fun_reset_rcb_vqid_start = cpu_to_le16(queue_gid); + req->fun_reset_rcb_vqid_num = cpu_to_le16(handle->kinfo.num_tqps);
- ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { - dev_warn(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return; + dev_err(&hdev->pdev->dev, + "failed to send rcb reset cmd, ret = %d\n", ret); + return ret; }
- while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + return_status = req->fun_reset_rcb_return_status; + if (return_status == HCLGE_RESET_RCB_SUCCESS) + return 0;
- /* Wait for tqp hw reset */ - usleep_range(1000, 1200); + if (return_status != HCLGE_RESET_RCB_NOT_SUPPORT) { + dev_err(&hdev->pdev->dev, "failed to reset rcb, ret = %u\n", + return_status); + return -EIO; }
- if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_warn(&hdev->pdev->dev, "Reset TQP fail\n"); - return; + /* if reset rcb cmd is unsupported, we need to send reset tqp cmd + * again to reset all tqps + */ + return hclge_reset_tqp_cmd(handle); + } + + int hclge_reset_tqp(struct hnae3_handle *handle) + { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + /* only need to disable PF's tqp */ + if (!vport->vport_id) { + ret = hclge_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to disable tqp, ret = %d\n", ret); + return ret; + } }
- ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_warn(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); + return hclge_reset_rcb(handle); }
static u32 hclge_get_fw_version(struct hnae3_handle *handle) @@@ -10012,9 -10588,10 +10587,10 @@@ static void hclge_get_pauseparam(struc { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct phy_device *phydev = hdev->hw.mac.phydev; + u8 media_type = hdev->hw.mac.media_type;
- *auto_neg = phydev ? hclge_get_autoneg(handle) : 0; + *auto_neg = (media_type == HNAE3_MEDIA_TYPE_COPPER) ? + hclge_get_autoneg(handle) : 0;
if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) { *rx_en = 0; @@@ -10060,7 -10637,7 +10636,7 @@@ static int hclge_set_pauseparam(struct struct phy_device *phydev = hdev->hw.mac.phydev; u32 fc_autoneg;
- if (phydev) { + if (phydev || hnae3_dev_phy_imp_supported(hdev)) { fc_autoneg = hclge_get_autoneg(handle); if (auto_neg != fc_autoneg) { dev_info(&hdev->pdev->dev, @@@ -10079,7 -10656,7 +10655,7 @@@
hclge_record_user_pauseparam(hdev, rx_en, tx_en);
- if (!auto_neg) + if (!auto_neg || hnae3_dev_phy_imp_supported(hdev)) return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
if (phydev) @@@ -10181,7 -10758,6 +10757,6 @@@ static void hclge_info_show(struct hclg dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc); dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc); dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport); - dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport); dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs); dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map); dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size); @@@ -10296,39 -10872,35 +10871,35 @@@ static int hclge_init_client_instance(s struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i, ret; - - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; + struct hclge_vport *vport = &hdev->vport[0]; + int ret;
- switch (client->type) { - case HNAE3_CLIENT_KNIC: - hdev->nic_client = client; - vport->nic.client = client; - ret = hclge_init_nic_client_instance(ae_dev, vport); - if (ret) - goto clear_nic; + switch (client->type) { + case HNAE3_CLIENT_KNIC: + hdev->nic_client = client; + vport->nic.client = client; + ret = hclge_init_nic_client_instance(ae_dev, vport); + if (ret) + goto clear_nic;
- ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce;
- break; - case HNAE3_CLIENT_ROCE: - if (hnae3_dev_roce_supported(hdev)) { - hdev->roce_client = client; - vport->roce.client = client; - } + break; + case HNAE3_CLIENT_ROCE: + if (hnae3_dev_roce_supported(hdev)) { + hdev->roce_client = client; + vport->roce.client = client; + }
- ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce;
- break; - default: - return -EINVAL; - } + break; + default: + return -EINVAL; }
return 0; @@@ -10347,32 -10919,27 +10918,27 @@@ static void hclge_uninit_client_instanc struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i; + struct hclge_vport *vport = &hdev->vport[0];
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; - if (hdev->roce_client) { - clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - hdev->roce_client->ops->uninit_instance(&vport->roce, - 0); - hdev->roce_client = NULL; - vport->roce.client = NULL; - } - if (client->type == HNAE3_CLIENT_ROCE) - return; - if (hdev->nic_client && client->ops->uninit_instance) { - clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - client->ops->uninit_instance(&vport->nic, 0); - hdev->nic_client = NULL; - vport->nic.client = NULL; - } + if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + hdev->roce_client = NULL; + vport->roce.client = NULL; + } + if (client->type == HNAE3_CLIENT_ROCE) + return; + if (hdev->nic_client && client->ops->uninit_instance) { + clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + client->ops->uninit_instance(&vport->nic, 0); + hdev->nic_client = NULL; + vport->nic.client = NULL; } }
@@@ -10637,7 -11204,8 +11203,8 @@@ static int hclge_init_ae_dev(struct hna if (ret) goto err_msi_irq_uninit;
- if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER && + !hnae3_dev_phy_imp_supported(hdev)) { ret = hclge_mac_mdio_config(hdev); if (ret) goto err_msi_irq_uninit; @@@ -11030,6 -11598,13 +11597,13 @@@ static int hclge_reset_ae_dev(struct hn return ret; }
+ ret = hclge_tp_port_init(hdev); + if (ret) { + dev_err(&pdev->dev, "failed to init tp port, ret = %d\n", + ret); + return ret; + } + ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX); if (ret) { dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret); @@@ -11120,6 -11695,7 +11694,7 @@@ static void hclge_uninit_ae_dev(struct hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); hclge_uninit_mac_table(hdev); + hclge_del_all_fd_entries(hdev);
if (mac->phydev) mdiobus_unregister(mac->mdio_bus); @@@ -11379,7 -11955,6 +11954,6 @@@ static int hclge_get_64_bit_regs(struc #define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) #define REG_SEPARATOR_LINE 1 #define REG_NUM_REMAIN_MASK 3 - #define BD_LIST_MAX_NUM 30
int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) { @@@ -11473,15 -12048,19 +12047,19 @@@ static int hclge_get_dfx_reg_len(struc { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int data_len_per_desc, bd_num, i; - int bd_num_list[BD_LIST_MAX_NUM]; + int *bd_num_list; u32 data_len; int ret;
+ bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; }
data_len_per_desc = sizeof_field(struct hclge_desc, data); @@@ -11492,6 -12071,8 +12070,8 @@@ *len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE; }
+ out: + kfree(bd_num_list); return ret; }
@@@ -11499,16 -12080,20 +12079,20 @@@ static int hclge_get_dfx_reg(struct hcl { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int bd_num, bd_num_max, buf_len, i; - int bd_num_list[BD_LIST_MAX_NUM]; struct hclge_desc *desc_src; + int *bd_num_list; u32 *reg = data; int ret;
+ bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; }
bd_num_max = bd_num_list[0]; @@@ -11517,8 -12102,10 +12101,10 @@@
buf_len = sizeof(*desc_src) * bd_num_max; desc_src = kzalloc(buf_len, GFP_KERNEL); - if (!desc_src) - return -ENOMEM; + if (!desc_src) { + ret = -ENOMEM; + goto out; + }
for (i = 0; i < dfx_reg_type_num; i++) { bd_num = bd_num_list[i]; @@@ -11534,6 -12121,8 +12120,8 @@@ }
kfree(desc_src); + out: + kfree(bd_num_list); return ret; }
@@@ -11943,7 -12532,6 +12531,6 @@@ static const struct hnae3_ae_ops hclge_ .get_link_mode = hclge_get_link_mode, .add_fd_entry = hclge_add_fd_entry, .del_fd_entry = hclge_del_fd_entry, - .del_all_fd_entries = hclge_del_all_fd_entries, .get_fd_rule_cnt = hclge_get_fd_rule_cnt, .get_fd_rule_info = hclge_get_fd_rule_info, .get_fd_all_rules = hclge_get_all_rules, @@@ -11971,6 -12559,8 +12558,8 @@@ .add_cls_flower = hclge_add_cls_flower, .del_cls_flower = hclge_del_cls_flower, .cls_flower_active = hclge_is_cls_flower_active, + .get_phy_link_ksettings = hclge_get_phy_link_ksettings, + .set_phy_link_ksettings = hclge_set_phy_link_ksettings, };
static struct hnae3_ae_algo ae_algo = { diff --combined drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 14b83eca0a5e,1682769112d0..9838cc75e4c8 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@@ -497,7 -497,6 +497,6 @@@ void hclgevf_update_link_status(struct
link_state = test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state; - if (link_state != hdev->hw.mac.link) { client->ops->link_status_change(handle, !!link_state); if (rclient && rclient->ops->link_status_change) @@@ -707,6 -706,9 +706,9 @@@ static int hclgevf_set_rss_tc_mode(stru (tc_valid[i] & 0x1)); hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M, HCLGEVF_RSS_TC_SIZE_S, tc_size[i]); + hnae3_set_bit(mode, HCLGEVF_RSS_TC_SIZE_MSB_B, + tc_size[i] >> HCLGEVF_RSS_TC_SIZE_MSB_OFFSET & + 0x1); hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M, HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
@@@ -1241,12 -1243,11 +1243,11 @@@ static void hclgevf_sync_promisc_mode(s } }
- static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) + static int hclgevf_tqp_enable_cmd_send(struct hclgevf_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclgevf_cfg_com_tqp_queue_cmd *req; struct hclgevf_desc desc; - int status;
req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data;
@@@ -1257,12 -1258,22 +1258,22 @@@ if (enable) req->enable |= 1U << HCLGEVF_TQP_ENABLE_B;
- status = hclgevf_cmd_send(&hdev->hw, &desc, 1); - if (status) - dev_err(&hdev->pdev->dev, - "TQP enable fail, status =%d.\n", status); + return hclgevf_cmd_send(&hdev->hw, &desc, 1); + }
- return status; + static int hclgevf_tqp_enable(struct hnae3_handle *handle, bool enable) + { + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclgevf_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + + return 0; }
static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) @@@ -1711,20 -1722,39 +1722,39 @@@ static int hclgevf_en_hw_strip_rxvtag(s return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); }
- static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) + static int hclgevf_reset_tqp(struct hnae3_handle *handle) { + #define HCLGEVF_RESET_ALL_QUEUE_DONE 1U struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; + u8 return_status = 0; int ret; + u16 i;
/* disable vf queue before send queue reset msg to PF */ - ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); - if (ret) + ret = hclgevf_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, "failed to disable tqp, ret = %d\n", + ret); return ret; + }
hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); - memcpy(send_msg.data, &queue_id, sizeof(queue_id)); - return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &return_status, + sizeof(return_status)); + if (ret || return_status == HCLGEVF_RESET_ALL_QUEUE_DONE) + return ret; + + for (i = 1; i < handle->kinfo.num_tqps; i++) { + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); + memcpy(send_msg.data, &i, sizeof(i)); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + if (ret) + return ret; + } + + return 0; }
static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu) @@@ -2356,7 -2386,6 +2386,6 @@@ static enum hclgevf_evt_cause hclgevf_c /* fetch the events from their corresponding regs */ cmdq_stat_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG); - if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) { rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING); dev_info(&hdev->pdev->dev, @@@ -2624,28 -2653,25 +2653,25 @@@ static int hclgevf_ae_start(struct hnae { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); + hclgevf_reset_tqp_stats(handle);
hclgevf_request_link_info(hdev);
hclgevf_update_link_mode(hdev);
- clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); - return 0; }
static void hclgevf_ae_stop(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - int i;
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
if (hdev->reset_type != HNAE3_VF_RESET) - for (i = 0; i < handle->kinfo.num_tqps; i++) - if (hclgevf_reset_tqp(handle, i)) - break; + hclgevf_reset_tqp(handle);
hclgevf_reset_tqp_stats(handle); hclgevf_update_link_status(hdev, 0); diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 96d5202a73e8,c4c167650b6b..54103139e156 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@@ -212,7 -212,7 +212,7 @@@ static void __i40e_add_stat_strings(u8 }
/** - * 40e_add_stat_strings - copy stat strings into ethtool buffer + * i40e_add_stat_strings - copy stat strings into ethtool buffer * @p: ethtool supplied buffer * @stats: stat definitions array * @@@ -232,8 -232,6 +232,8 @@@ I40E_STAT(struct i40e_vsi, _name, _stat) #define I40E_VEB_STAT(_name, _stat) \ I40E_STAT(struct i40e_veb, _name, _stat) +#define I40E_VEB_TC_STAT(_name, _stat) \ + I40E_STAT(struct i40e_cp_veb_tc_stats, _name, _stat) #define I40E_PFC_STAT(_name, _stat) \ I40E_STAT(struct i40e_pfc_stats, _name, _stat) #define I40E_QUEUE_STAT(_name, _stat) \ @@@ -268,18 -266,11 +268,18 @@@ static const struct i40e_stats i40e_gst I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol), };
+struct i40e_cp_veb_tc_stats { + u64 tc_rx_packets; + u64 tc_rx_bytes; + u64 tc_tx_packets; + u64 tc_tx_bytes; +}; + static const struct i40e_stats i40e_gstrings_veb_tc_stats[] = { - I40E_VEB_STAT("veb.tc_%u_tx_packets", tc_stats.tc_tx_packets), - I40E_VEB_STAT("veb.tc_%u_tx_bytes", tc_stats.tc_tx_bytes), - I40E_VEB_STAT("veb.tc_%u_rx_packets", tc_stats.tc_rx_packets), - I40E_VEB_STAT("veb.tc_%u_rx_bytes", tc_stats.tc_rx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_tx_packets", tc_tx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_tx_bytes", tc_tx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_rx_packets", tc_rx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_rx_bytes", tc_rx_bytes), };
static const struct i40e_stats i40e_gstrings_misc_stats[] = { @@@ -1110,7 -1101,6 +1110,7 @@@ static int i40e_get_link_ksettings(stru
/* Set flow control settings */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause);
switch (hw->fc.requested_mode) { case I40E_FC_FULL: @@@ -2226,29 -2216,6 +2226,29 @@@ static int i40e_get_sset_count(struct n } }
+/** + * i40e_get_veb_tc_stats - copy VEB TC statistics to formatted structure + * @tc: the TC statistics in VEB structure (veb->tc_stats) + * @i: the index of traffic class in (veb->tc_stats) structure to copy + * + * Copy VEB TC statistics from structure of arrays (veb->tc_stats) to + * one dimensional structure i40e_cp_veb_tc_stats. + * Produce formatted i40e_cp_veb_tc_stats structure of the VEB TC + * statistics for the given TC. + **/ +static struct i40e_cp_veb_tc_stats +i40e_get_veb_tc_stats(struct i40e_veb_tc_stats *tc, unsigned int i) +{ + struct i40e_cp_veb_tc_stats veb_tc = { + .tc_rx_packets = tc->tc_rx_packets[i], + .tc_rx_bytes = tc->tc_rx_bytes[i], + .tc_tx_packets = tc->tc_tx_packets[i], + .tc_tx_bytes = tc->tc_tx_bytes[i], + }; + + return veb_tc; +} + /** * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure * @pf: the PF device structure @@@ -2333,16 -2300,8 +2333,16 @@@ static void i40e_get_ethtool_stats(stru i40e_gstrings_veb_stats);
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) - i40e_add_ethtool_stats(&data, veb_stats ? veb : NULL, - i40e_gstrings_veb_tc_stats); + if (veb_stats) { + struct i40e_cp_veb_tc_stats veb_tc = + i40e_get_veb_tc_stats(&veb->tc_stats, i); + + i40e_add_ethtool_stats(&data, &veb_tc, + i40e_gstrings_veb_tc_stats); + } else { + i40e_add_ethtool_stats(&data, NULL, + i40e_gstrings_veb_tc_stats); + }
i40e_add_ethtool_stats(&data, pf, i40e_gstrings_stats);
@@@ -2409,21 -2368,15 +2409,15 @@@ static void i40e_get_priv_flag_strings( struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - char *p = (char *)data; unsigned int i; + u8 *p = data;
- for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - i40e_gstrings_priv_flags[i].flag_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) + ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string); if (pf->hw.pf_id != 0) return; - for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - i40e_gl_gstrings_priv_flags[i].flag_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) + ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string); }
static void i40e_get_strings(struct net_device *netdev, u32 stringset, diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c index af6c25fa493c,1555d6009bf5..113a92fb1ef2 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@@ -2023,7 -2023,7 +2023,7 @@@ static void i40e_undo_add_filter_entrie }
/** - * i40e_next_entry - Get the next non-broadcast filter from a list + * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we @@@ -5204,7 -5204,7 +5204,7 @@@ static u8 i40e_pf_get_num_tc(struct i40 }
/** - * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes + * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF. @@@ -6738,9 -6738,9 +6738,9 @@@ out set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } - /* registers are set, lets apply */ - if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) - ret = i40e_hw_set_dcb_config(pf, new_cfg); + /* registers are set, lets apply */ + if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) + ret = i40e_hw_set_dcb_config(pf, new_cfg); }
err: @@@ -7339,7 -7339,7 +7339,7 @@@ static void i40e_vsi_set_default_tc_con qcount = min_t(int, vsi->alloc_queue_pairs, i40e_pf_get_max_q_per_tc(vsi->back)); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - /* For the TC that is not enabled set the offset to to default + /* For the TC that is not enabled set the offset to default * queue and allocate one queue for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; @@@ -9467,7 -9467,7 +9467,7 @@@ static void i40e_fdir_flush_and_replay( }
/** - * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed + * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure **/ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) @@@ -10573,6 -10573,12 +10573,6 @@@ static void i40e_rebuild(struct i40e_p goto end_core_reset; }
- if (!lock_acquired) - rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); - if (ret) - goto end_unlock; - #ifdef CONFIG_I40E_DCB /* Enable FW to write a default DCB config on link-up * unless I40E_FLAG_TC_MQPRIO was enabled or DCB @@@ -10587,7 -10593,7 +10587,7 @@@ i40e_aq_set_dcb_parameters(hw, false, NULL); dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; } else { i40e_aq_set_dcb_parameters(hw, true, NULL); ret = i40e_init_pf_dcb(pf); @@@ -10601,11 -10607,6 +10601,11 @@@ }
#endif /* CONFIG_I40E_DCB */ + if (!lock_acquired) + rtnl_lock(); + ret = i40e_setup_pf_switch(pf, reinit); + if (ret) + goto end_unlock;
/* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. @@@ -10624,7 -10625,7 +10624,7 @@@ * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try - * try to recover minimal use by getting the basic PF VSI working. + * to recover minimal use by getting the basic PF VSI working. */ if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); diff --combined drivers/net/ethernet/intel/i40e/i40e_xsk.c index 12ca84113587,d89c22347d9d..46d884417c63 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@@ -160,6 -160,13 +160,13 @@@ static int i40e_run_xdp_zc(struct i40e_ xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp);
+ if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; + rcu_read_unlock(); + return result; + } + switch (act) { case XDP_PASS: break; @@@ -167,10 -174,6 +174,6 @@@ xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); break; - case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; - break; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@@ -471,7 -474,7 +474,7 @@@ static bool i40e_xmit_zc(struct i40e_ri
nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); if (!nb_pkts) - return false; + return true;
if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; @@@ -488,7 -491,7 +491,7 @@@
i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
- return true; + return nb_pkts < budget; }
/** @@@ -625,7 -628,7 +628,7 @@@ void i40e_xsk_clean_rx_ring(struct i40e }
/** - * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown + * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown * @tx_ring: XDP Tx ring **/ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) diff --combined drivers/net/ethernet/intel/ice/ice.h index 17101c45cbcd,02badaaf818c..9413ac4d3fb9 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@@ -73,7 -73,7 +73,7 @@@ #define ICE_MIN_LAN_TXRX_MSIX 1 #define ICE_MIN_LAN_OICR_MSIX 1 #define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) - #define ICE_FDIR_MSIX 1 + #define ICE_FDIR_MSIX 2 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@@ -84,9 -84,12 +84,12 @@@ #define ICE_MAX_LG_RSS_QS 256 #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) + /* All VF control VSIs share the same IRQ, so assign a unique ID for them */ + #define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_MISC_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_INVAL_VFID 256
+ #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ #define ICE_MAX_RESET_WAIT 20
#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@@ -190,12 -193,13 +193,12 @@@ struct ice_sw u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */ };
- enum ice_state { + enum ice_pf_state { __ICE_TESTING, __ICE_DOWN, __ICE_NEEDS_RESTART, __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ - __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */ __ICE_PFR_REQ, /* set by driver and peers */ __ICE_CORER_REQ, /* set by driver and peers */ __ICE_GLOBR_REQ, /* set by driver and peers */ @@@ -228,15 -232,18 +231,18 @@@ __ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ __ICE_LINK_DEFAULT_OVERRIDE_PENDING, __ICE_PHY_INIT_COMPLETE, + __ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ __ICE_STATE_NBITS /* must be last */ };
- enum ice_vsi_flags { - ICE_VSI_FLAG_UMAC_FLTR_CHANGED, - ICE_VSI_FLAG_MMAC_FLTR_CHANGED, - ICE_VSI_FLAG_VLAN_FLTR_CHANGED, - ICE_VSI_FLAG_PROMISC_CHANGED, - ICE_VSI_FLAG_NBITS /* must be last */ + enum ice_vsi_state { + ICE_VSI_DOWN, + ICE_VSI_NEEDS_RESTART, + ICE_VSI_UMAC_FLTR_CHANGED, + ICE_VSI_MMAC_FLTR_CHANGED, + ICE_VSI_VLAN_FLTR_CHANGED, + ICE_VSI_PROMISC_CHANGED, + ICE_VSI_STATE_NBITS /* must be last */ };
/* struct that defines a VSI, associated with a dev */ @@@ -252,8 -259,7 +258,7 @@@ struct ice_vsi irqreturn_t (*irq_handler)(int irq, void *data);
u64 tx_linearize; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); - DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); + DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS); unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; @@@ -499,7 -505,7 +504,7 @@@ ice_irq_dynamic_ena(struct ice_hw *hw, val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr << GLINT_DYN_CTL_ITR_INDX_S); if (vsi) - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return; wr32(hw, GLINT_DYN_CTL(vector), val); } @@@ -616,14 -622,16 +621,16 @@@ int ice_destroy_xdp_rings(struct ice_vs int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); - int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); - int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); + int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); + int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); + int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed); + int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); const char *ice_stat_str(enum ice_status stat_err); const char *ice_aq_str(enum ice_aq_err aq_err); -bool ice_is_wol_supported(struct ice_pf *pf); +bool ice_is_wol_supported(struct ice_hw *hw); int ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add, bool is_tun); @@@ -641,7 -649,6 +648,7 @@@ int ice_fdir_create_dflt_rules(struct i int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, struct ice_rq_event_info *event); int ice_open(struct net_device *netdev); +int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf);
diff --combined drivers/net/ethernet/intel/ice/ice_common.c index a20edf1538a0,54df00ee912b..34fddbc30822 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@@ -717,8 -717,8 +717,8 @@@ static enum ice_status ice_cfg_fw_log(s
if (!data) { data = devm_kcalloc(ice_hw_to_dev(hw), - sizeof(*data), ICE_AQC_FW_LOG_ID_MAX, + sizeof(*data), GFP_KERNEL); if (!data) return ICE_ERR_NO_MEMORY; @@@ -3186,7 -3186,7 +3186,7 @@@ ice_aq_sff_eeprom(struct ice_hw *hw, u1
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom); cmd = &desc.params.read_write_sff_param; - desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF); + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); cmd->lport_num = (u8)(lport & 0xff); cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & @@@ -3206,23 -3206,33 +3206,33 @@@ /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_id: VSI FW index - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer - * @glob_lut_idx: global LUT index + * @params: RSS LUT parameters * @set: set true to set the table, false to get the table * * Internal function to get (0x0B05) or set (0x0B03) RSS look up table */ static enum ice_status - __ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, - u16 lut_size, u8 glob_lut_idx, bool set) + __ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set) { + u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle; struct ice_aqc_get_set_rss_lut *cmd_resp; struct ice_aq_desc desc; enum ice_status status; - u16 flags = 0; + u8 *lut; + + if (!params) + return ICE_ERR_PARAM; + + vsi_handle = params->vsi_handle; + lut = params->lut; + + if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) + return ICE_ERR_PARAM; + + lut_size = params->lut_size; + lut_type = params->lut_type; + glob_lut_idx = params->global_lut_id; + vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
cmd_resp = &desc.params.get_set_rss_lut;
@@@ -3296,43 -3306,27 +3306,27 @@@ ice_aq_get_set_rss_lut_exit /** * ice_aq_get_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @get_params: RSS LUT parameters used to specify which RSS LUT to get * * get the RSS lookup table, PF or VSI type */ enum ice_status - ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) + ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, false); + return __ice_aq_get_set_rss_lut(hw, get_params, false); }
/** * ice_aq_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @set_params: RSS LUT parameters used to specify how to set the RSS LUT * * set the RSS lookup table, PF or VSI type */ enum ice_status - ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) + ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, true); + return __ice_aq_get_set_rss_lut(hw, set_params, true); }
/** @@@ -4373,7 -4367,7 +4367,7 @@@ ice_aq_set_lldp_mib(struct ice_hw *hw, }
/** - * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl + * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl * @hw: pointer to HW struct */ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) diff --combined drivers/net/ethernet/intel/ice/ice_controlq.h index 68866f4f0eb0,7d0905f25ddc..77c2307d4fb8 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@@ -14,8 -14,8 +14,8 @@@ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
#define ICE_CTL_Q_DESC_UNUSED(R) \ - (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) + ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1))
/* Defines that help manage the driver vs FW API checks. * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. @@@ -31,8 -31,8 +31,8 @@@ enum ice_ctl_q ICE_CTL_Q_MAILBOX, };
-/* Control Queue timeout settings - max delay 250ms */ -#define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ +/* Control Queue timeout settings - max delay 1s */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 10000 /* Count 10000 times */ #define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ diff --combined drivers/net/ethernet/intel/ice/ice_dcb.c index 211ac6f907ad,85c9eccfdae8..43c6af42de8a --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@@ -738,27 -738,22 +738,27 @@@ ice_aq_get_cee_dcb_cfg(struct ice_hw *h /** * ice_cee_to_dcb_cfg * @cee_cfg: pointer to CEE configuration struct - * @dcbcfg: DCB configuration struct + * @pi: port information structure * * Convert CEE configuration from firmware to DCB configuration */ static void ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, - struct ice_dcbx_cfg *dcbcfg) + struct ice_port_info *pi) { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift; + u8 i, j, err, sync, oper, app_index, ice_app_sel_type; u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); - u8 i, err, sync, oper, app_index, ice_app_sel_type; u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift; + struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg; u16 ice_app_prot_id_type;
- /* CEE PG data to ETS config */ + dcbcfg = &pi->qos_cfg.local_dcbx_cfg; + dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE; + dcbcfg->tlv_status = tlv_status; + + /* CEE PG data */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
/* Note that the FW creates the oper_prio_tc nibbles reversed @@@ -785,16 -780,10 +785,16 @@@ } }
- /* CEE PFC data to ETS config */ + /* CEE PFC data */ dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en; dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+ /* CEE APP TLV data */ + if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING) + cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg; + else + cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg; + app_index = 0; for (i = 0; i < 3; i++) { if (i == 0) { @@@ -813,18 -802,6 +813,18 @@@ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S; ice_app_sel_type = ICE_APP_SEL_TCPIP; ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI; + + for (j = 0; j < cmp_dcbcfg->numapps; j++) { + u16 prot_id = cmp_dcbcfg->app[j].prot_id; + u8 sel = cmp_dcbcfg->app[j].selector; + + if (sel == ICE_APP_SEL_TCPIP && + (prot_id == ICE_APP_PROT_ID_ISCSI || + prot_id == ICE_APP_PROT_ID_ISCSI_860)) { + ice_app_prot_id_type = prot_id; + break; + } + } } else { /* FIP APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M; @@@ -857,7 -834,7 +857,7 @@@ }
/** - * ice_get_ieee_dcb_cfg + * ice_get_ieee_or_cee_dcb_cfg * @pi: port information structure * @dcbx_mode: mode of DCBX (IEEE or CEE) * @@@ -915,8 -892,11 +915,8 @@@ enum ice_status ice_get_dcb_cfg(struct ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL); if (!ret) { /* CEE mode */ - dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; - dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE; - dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status); - ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg); ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE); + ice_cee_to_dcb_cfg(&cee_cfg, pi); } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { /* CEE mode not enabled try querying IEEE data */ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; diff --combined drivers/net/ethernet/intel/ice/ice_ethtool.c index 32ba71a16165,15152e63f204..9efbf4561b8c --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@@ -871,68 -871,47 +871,47 @@@ static void ice_get_strings(struct net_ { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - char *p = (char *)data; unsigned int i; + u8 *p = data;
switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < ICE_VSI_STATS_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_vsi_stats[i].stat_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_VSI_STATS_LEN; i++) + ethtool_sprintf(&p, + ice_gstrings_vsi_stats[i].stat_string);
ice_for_each_alloc_txq(vsi, i) { - snprintf(p, ETH_GSTRING_LEN, - "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); }
ice_for_each_alloc_rxq(vsi, i) { - snprintf(p, ETH_GSTRING_LEN, - "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); }
if (vsi->type != ICE_VSI_PF) return;
- for (i = 0; i < ICE_PF_STATS_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_pf_stats[i].stat_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_PF_STATS_LEN; i++) + ethtool_sprintf(&p, + ice_gstrings_pf_stats[i].stat_string);
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - snprintf(p, ETH_GSTRING_LEN, - "tx_priority_%u_xon.nic", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, - "tx_priority_%u_xoff.nic", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i); + ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i); } for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - snprintf(p, ETH_GSTRING_LEN, - "rx_priority_%u_xon.nic", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, - "rx_priority_%u_xoff.nic", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i); + ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i); } break; case ETH_SS_TEST: memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN); break; case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_priv_flags[i].name); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) + ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name); break; default: break; @@@ -2907,7 -2886,7 +2886,7 @@@ process_link /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ice_down(vsi);
if (tx_rings) { @@@ -3161,7 -3140,7 +3140,7 @@@ ice_get_rxfh(struct net_device *netdev struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - int ret = 0, i; + int err, i; u8 *lut;
if (hfunc) @@@ -3180,17 -3159,20 +3159,20 @@@ if (!lut) return -ENOMEM;
- if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) { - ret = -EIO; + err = ice_get_rss_key(vsi, key); + if (err) + goto out; + + err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) goto out; - }
for (i = 0; i < vsi->rss_table_size; i++) indir[i] = (u32)(lut[i]);
out: kfree(lut); - return ret; + return err; }
/** @@@ -3211,7 -3193,7 +3193,7 @@@ ice_set_rxfh(struct net_device *netdev struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct device *dev; - u8 *seed = NULL; + int err;
dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) @@@ -3232,7 -3214,10 +3214,10 @@@ return -ENOMEM; } memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE); - seed = vsi->rss_hkey_user; + + err = ice_set_rss_key(vsi, vsi->rss_hkey_user); + if (err) + return err; }
if (!vsi->rss_lut_user) { @@@ -3253,8 -3238,9 +3238,9 @@@ vsi->rss_size); }
- if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size)) - return -EIO; + err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size); + if (err) + return err;
return 0; } @@@ -3350,10 -3336,9 +3336,9 @@@ static int ice_get_valid_rss_size(struc static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) { struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; struct ice_hw *hw; - int err = 0; + int err; u8 *lut;
dev = ice_pf_to_dev(pf); @@@ -3374,14 -3359,10 +3359,10 @@@
/* create/set RSS LUT */ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) + dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err, ice_aq_str(hw->adminq.sq_last_status)); - err = -EIO; - }
kfree(lut); return err; @@@ -3472,7 -3453,7 +3453,7 @@@ static void ice_get_wol(struct net_devi netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
/* Get WoL settings based on the HW capability */ - if (ice_is_wol_supported(pf)) { + if (ice_is_wol_supported(&pf->hw)) { wol->supported = WAKE_MAGIC; wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0; } else { @@@ -3492,7 -3473,7 +3473,7 @@@ static int ice_set_wol(struct net_devic struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back;
- if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(pf)) + if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw)) return -EOPNOTSUPP;
/* only magic packet is supported */ @@@ -3947,14 -3928,14 +3928,14 @@@ ice_get_module_eeprom(struct net_devic u8 value = 0; u8 page = 0;
- status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, - &value, 1, 0, NULL); - if (status) - return -EIO; - if (!ee || !ee->len || !data) return -EINVAL;
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0, + NULL); + if (status) + return -EIO; + if (value == ICE_MODULE_TYPE_SFP) is_sfp = true;
diff --combined drivers/net/ethernet/intel/ice/ice_lib.c index d13c7fc8fb0a,6041ca2830de..4778374d4fbe --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@@ -343,6 -343,9 +343,9 @@@ static int ice_vsi_clear(struct ice_vs pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL) pf->next_vsi = vsi->idx; + if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && + vsi->vf_id != ICE_INVAL_VFID) + pf->next_vsi = vsi->idx;
ice_vsi_free_arrays(vsi); mutex_unlock(&pf->sw_mutex); @@@ -419,7 -422,7 +422,7 @@@ ice_vsi_alloc(struct ice_pf *pf, enum i
vsi->type = vsi_type; vsi->back = pf; - set_bit(__ICE_DOWN, vsi->state); + set_bit(ICE_VSI_DOWN, vsi->state);
if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); @@@ -454,8 -457,8 +457,8 @@@ goto unlock_pf; }
- if (vsi->type == ICE_VSI_CTRL) { - /* Use the last VSI slot as the index for the control VSI */ + if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) { + /* Use the last VSI slot as the index for PF control VSI */ vsi->idx = pf->num_alloc_vsi - 1; pf->ctrl_vsi_idx = vsi->idx; pf->vsi[vsi->idx] = vsi; @@@ -468,6 -471,9 +471,9 @@@ pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, pf->next_vsi); } + + if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID) + pf->vf[vf_id].ctrl_vsi_idx = vsi->idx; goto unlock_pf;
err_rings: @@@ -506,7 -512,7 +512,7 @@@ static int ice_alloc_fd_res(struct ice_ if (!b_val) return -EPERM;
- if (vsi->type != ICE_VSI_PF) + if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF)) return -EPERM;
if (!test_bit(ICE_FLAG_FD_ENA, pf->flags)) @@@ -517,6 -523,13 +523,13 @@@ /* each VSI gets same "best_effort" quota */ vsi->num_bfltr = b_val;
+ if (vsi->type == ICE_VSI_VF) { + vsi->num_gfltr = 0; + + /* each VSI gets same "best_effort" quota */ + vsi->num_bfltr = b_val; + } + return 0; }
@@@ -729,11 -742,10 +742,10 @@@ static void ice_set_dflt_vsi_ctx(struc */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, tx_count = 0; + u16 offset = 0, qmap = 0, tx_count = 0, pow = 0; + u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - u16 tx_numq_tc, rx_numq_tc; - u16 pow = 0, max_rss = 0; bool ena_tc0 = false; u8 netdev_tc = 0; int i; @@@ -751,12 -763,15 +763,15 @@@ vsi->tc_cfg.ena_tc |= 1; }
- rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc; - if (!rx_numq_tc) - rx_numq_tc = 1; - tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc; - if (!tx_numq_tc) - tx_numq_tc = 1; + num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); + if (!num_rxq_per_tc) + num_rxq_per_tc = 1; + num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc; + if (!num_txq_per_tc) + num_txq_per_tc = 1; + + /* find the (rounded up) power-of-2 of qcount */ + pow = (u16)order_base_2(num_rxq_per_tc);
/* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. @@@ -769,26 -784,6 +784,6 @@@ * * Setup number and offset of Rx queues for all TCs for the VSI */ - - qcount_rx = rx_numq_tc; - - /* qcount will change if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { - if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) { - if (vsi->type == ICE_VSI_PF) - max_rss = ICE_MAX_LG_RSS_QS; - else - max_rss = ICE_MAX_RSS_QS_PER_VF; - qcount_rx = min_t(u16, rx_numq_tc, max_rss); - if (!vsi->req_rxq) - qcount_rx = min_t(u16, qcount_rx, - vsi->rss_size); - } - } - - /* find the (rounded up) power-of-2 of qcount */ - pow = (u16)order_base_2(qcount_rx); - ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ @@@ -802,16 -797,16 +797,16 @@@
/* TC is enabled */ vsi->tc_cfg.tc_info[i].qoffset = offset; - vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; - vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc; + vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc; + vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc; vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & ICE_AQ_VSI_TC_Q_OFFSET_M) | ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); - offset += qcount_rx; - tx_count += tx_numq_tc; + offset += num_rxq_per_tc; + tx_count += num_txq_per_tc; ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); }
@@@ -824,7 -819,7 +819,7 @@@ if (offset) vsi->num_rxq = offset; else - vsi->num_rxq = qcount_rx; + vsi->num_rxq = num_rxq_per_tc;
vsi->num_txq = tx_count;
@@@ -856,7 -851,8 +851,8 @@@ static void ice_set_fd_vsi_ctx(struct i u8 dflt_q_group, dflt_q_prio; u16 dflt_q, report_q, val;
- if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL) + if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL && + vsi->type != ICE_VSI_VF) return;
val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID; @@@ -1179,7 -1175,24 +1175,24 @@@ static int ice_vsi_setup_vector_base(st
num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { + struct ice_vf *vf; + int i; + + ice_for_each_vf(pf, i) { + vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { + base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; + break; + } + } + if (i == pf->num_alloc_vfs) + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + ICE_RES_VF_CTRL_VEC_ID); + } else { + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + vsi->idx); + }
if (base < 0) { dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", @@@ -1313,7 -1326,7 +1326,7 @@@ int ice_vsi_manage_rss_lut(struct ice_v vsi->rss_size); }
- err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size); + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); kfree(lut); return err; } @@@ -1324,12 -1337,10 +1337,10 @@@ */ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { - struct ice_aqc_get_set_rss_keys *key; struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; - int err = 0; - u8 *lut; + u8 *lut, *key; + int err;
dev = ice_pf_to_dev(pf); vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); @@@ -1343,37 -1354,26 +1354,26 @@@ else ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
- status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - - if (status) { - dev_err(dev, "set_rss_lut failed, error %s\n", - ice_stat_str(status)); - err = -EIO; + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) { + dev_err(dev, "set_rss_lut failed, error %d\n", err); goto ice_vsi_cfg_rss_exit; }
- key = kzalloc(sizeof(*key), GFP_KERNEL); + key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL); if (!key) { err = -ENOMEM; goto ice_vsi_cfg_rss_exit; }
if (vsi->rss_hkey_user) - memcpy(key, - (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); else - netdev_rss_key_fill((void *)key, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
- status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); - - if (status) { - dev_err(dev, "set_rss_key failed, error %s\n", - ice_stat_str(status)); - err = -EIO; - } + err = ice_set_rss_key(vsi, key); + if (err) + dev_err(dev, "set_rss_key failed, error %d\n", err);
kfree(key); ice_vsi_cfg_rss_exit: @@@ -2308,7 -2308,7 +2308,7 @@@ ice_vsi_setup(struct ice_pf *pf, struc struct ice_vsi *vsi; int ret, i;
- if (vsi_type == ICE_VSI_VF) + if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) vsi = ice_vsi_alloc(pf, vsi_type, vf_id); else vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID); @@@ -2323,7 -2323,7 +2323,7 @@@ if (vsi->type == ICE_VSI_PF) vsi->ethtype = ETH_P_PAUSE;
- if (vsi->type == ICE_VSI_VF) + if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL) vsi->vf_id = vf_id;
ice_alloc_fd_res(vsi); @@@ -2593,7 -2593,7 +2593,7 @@@ void ice_vsi_free_rx_rings(struct ice_v */ void ice_vsi_close(struct ice_vsi *vsi) { - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) ice_down(vsi);
ice_vsi_free_irq(vsi); @@@ -2610,17 -2610,17 +2610,17 @@@ int ice_ena_vsi(struct ice_vsi *vsi, bo { int err = 0;
- if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) + if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state)) return 0;
- clear_bit(__ICE_NEEDS_RESTART, vsi->state); + clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
if (vsi->netdev && vsi->type == ICE_VSI_PF) { if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock();
- err = ice_open(vsi->netdev); + err = ice_open_internal(vsi->netdev);
if (!locked) rtnl_unlock(); @@@ -2639,17 -2639,17 +2639,17 @@@ */ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) { - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return;
- set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
if (vsi->type == ICE_VSI_PF && vsi->netdev) { if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock();
- ice_stop(vsi->netdev); + ice_vsi_close(vsi);
if (!locked) rtnl_unlock(); @@@ -2770,7 -2770,24 +2770,24 @@@ int ice_vsi_release(struct ice_vsi *vsi * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type != ICE_VSI_VF) { + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { + struct ice_vf *vf; + int i; + + ice_for_each_vf(pf, i) { + vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) + break; + } + if (i == pf->num_alloc_vfs) { + /* No other VFs left that have control VSI, reclaim SW + * interrupts back to the common pool + */ + ice_free_res(pf->irq_tracker, vsi->base_vector, + ICE_RES_VF_CTRL_VEC_ID); + pf->num_avail_sw_msix += vsi->num_q_vectors; + } + } else if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; @@@ -2795,7 -2812,7 +2812,7 @@@ ice_vsi_free_q_vectors(vsi);
/* make sure unregister_netdev() was called by checking __ICE_DOWN */ - if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) { + if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) { free_netdev(vsi->netdev); vsi->netdev = NULL; } @@@ -2818,38 -2835,46 +2835,46 @@@ }
/** - * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector + * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector * @q_vector: pointer to q_vector which is being updated - * @coalesce: pointer to array of struct with stored coalesce + * @stored_intrl_setting: original INTRL setting * * Set coalesce param in q_vector and update these parameters in HW. */ static void - ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector, - struct ice_coalesce_stored *coalesce) + ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector, + u16 stored_intrl_setting) { - struct ice_ring_container *rx_rc = &q_vector->rx; - struct ice_ring_container *tx_rc = &q_vector->tx; struct ice_hw *hw = &q_vector->vsi->back->hw;
- tx_rc->itr_setting = coalesce->itr_tx; - rx_rc->itr_setting = coalesce->itr_rx; - - /* dynamic ITR values will be updated during Tx/Rx */ - if (!ITR_IS_DYNAMIC(tx_rc->itr_setting)) - wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(tx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - if (!ITR_IS_DYNAMIC(rx_rc->itr_setting)) - wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - - q_vector->intrl = coalesce->intrl; + q_vector->intrl = stored_intrl_setting; wr32(hw, GLINT_RATE(q_vector->reg_idx), ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); }
+ /** + * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector + * @q_vector: pointer to q_vector which is being updated + * @rc: pointer to ring container + * @stored_itr_setting: original ITR setting + * + * Set coalesce param in q_vector and update these parameters in HW. + */ + static void + ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + u16 stored_itr_setting) + { + struct ice_hw *hw = &q_vector->vsi->back->hw; + + rc->itr_setting = stored_itr_setting; + + /* dynamic ITR values will be updated during Tx/Rx */ + if (!ITR_IS_DYNAMIC(rc->itr_setting)) + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S); + } + /** * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors * @vsi: VSI connected with q_vectors @@@ -2869,6 -2894,11 +2894,11 @@@ ice_vsi_rebuild_get_coalesce(struct ice coalesce[i].itr_tx = q_vector->tx.itr_setting; coalesce[i].itr_rx = q_vector->rx.itr_setting; coalesce[i].intrl = q_vector->intrl; + + if (i < vsi->num_txq) + coalesce[i].tx_valid = true; + if (i < vsi->num_rxq) + coalesce[i].rx_valid = true; }
return vsi->num_q_vectors; @@@ -2893,17 -2923,59 +2923,59 @@@ ice_vsi_rebuild_set_coalesce(struct ice if ((size && !coalesce) || !vsi) return;
- for (i = 0; i < size && i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[i]); - - /* number of q_vectors increased, so assume coalesce settings were - * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use - * the previous settings from q_vector 0 for all of the new q_vectors + /* There are a couple of cases that have to be handled here: + * 1. The case where the number of queue vectors stays the same, but + * the number of Tx or Rx rings changes (the first for loop) + * 2. The case where the number of queue vectors increased (the + * second for loop) + */ + for (i = 0; i < size && i < vsi->num_q_vectors; i++) { + /* There are 2 cases to handle here and they are the same for + * both Tx and Rx: + * if the entry was valid previously (coalesce[i].[tr]x_valid + * and the loop variable is less than the number of rings + * allocated, then write the previous values + * + * if the entry was not valid previously, but the number of + * rings is less than are allocated (this means the number of + * rings increased from previously), then write out the + * values in the first element + */ + if (i < vsi->alloc_rxq && coalesce[i].rx_valid) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[i].itr_rx); + else if (i < vsi->alloc_rxq) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[0].itr_rx); + + if (i < vsi->alloc_txq && coalesce[i].tx_valid) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[i].itr_tx); + else if (i < vsi->alloc_txq) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[0].itr_tx); + + ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], + coalesce[i].intrl); + } + + /* the number of queue vectors increased so write whatever is in + * the first element */ - for (; i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[0]); + for (; i < vsi->num_q_vectors; i++) { + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[0].itr_tx); + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[0].itr_rx); + ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], + coalesce[0].intrl); + } }
/** @@@ -2932,9 -3004,11 +3004,11 @@@ int ice_vsi_rebuild(struct ice_vsi *vsi
coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (coalesce) - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, - coalesce); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi);
@@@ -3078,6 -3152,7 +3152,6 @@@ err_vsi bool ice_is_reset_in_progress(unsigned long *state) { return test_bit(__ICE_RESET_OICR_RECV, state) || - test_bit(__ICE_DCBNL_DEVRESET, state) || test_bit(__ICE_PFR_REQ, state) || test_bit(__ICE_CORER_REQ, state) || test_bit(__ICE_GLOBR_REQ, state); diff --combined drivers/net/ethernet/intel/ice/ice_main.c index 9f1adff85be7,b3c1cadecf21..0fbaea838e97 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@@ -84,7 -84,7 +84,7 @@@ static void ice_check_for_hang_subtask( break; }
- if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state)) return;
if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) @@@ -140,21 -140,10 +140,10 @@@ static int ice_init_mac_fltr(struct ice
perm_addr = vsi->port_info->mac.perm_addr; status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI); - if (!status) - return 0; - - /* We aren't useful with no MAC filters, so unregister if we - * had an error - */ - if (vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n", - ice_stat_str(status)); - unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } + if (status) + return -EIO;
- return -EIO; + return 0; }
/** @@@ -209,9 -198,9 +198,9 @@@ static int ice_add_mac_to_unsync_list(s */ static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) { - return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); }
/** @@@ -278,9 -267,9 +267,9 @@@ static int ice_vsi_sync_fltr(struct ice INIT_LIST_HEAD(&vsi->tmp_unsync_list);
if (ice_vsi_fltr_changed(vsi)) { - clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
/* grab the netdev's addr_list_lock */ netif_addr_lock_bh(netdev); @@@ -361,8 -350,8 +350,8 @@@ }
if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || - test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { - clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) { + clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply Rx filter rule to get traffic from wire */ if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { @@@ -395,12 -384,12 +384,12 @@@ goto exit;
out_promisc: - set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); goto exit; out: /* if something went wrong then set the changed flag so we try again */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); exit: clear_bit(__ICE_CFG_BUSY, vsi->state); return err; @@@ -609,7 -598,7 +598,7 @@@ static void ice_print_topo_conflict(str case ICE_AQ_LINK_TOPO_UNREACH_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: - netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); + netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n"); break; case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); @@@ -764,7 -753,7 +753,7 @@@ static void ice_vsi_link_event(struct i if (!vsi) return;
- if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev) return;
if (vsi->type == ICE_VSI_PF) { @@@ -1044,7 -1033,7 +1033,7 @@@ struct ice_aq_task };
/** - * ice_wait_for_aq_event - Wait for an AdminQ event from firmware + * ice_aq_wait_for_event - Wait for an AdminQ event from firmware * @pf: pointer to the PF private structure * @opcode: the opcode to wait for * @timeout: how long to wait, in jiffies @@@ -2020,7 -2009,7 +2009,7 @@@ static void ice_check_media_subtask(str /* PHY settings are reset on media insertion, reconfigure * PHY to preserve settings. */ - if (test_bit(__ICE_DOWN, vsi->state) && + if (test_bit(ICE_VSI_DOWN, vsi->state) && test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) return;
@@@ -2071,6 -2060,7 +2060,7 @@@ static void ice_service_task(struct wor ice_process_vflr_event(pf); ice_clean_mailboxq_subtask(pf); ice_sync_arfs_fltrs(pf); + ice_flush_fdir_ctx(pf); /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ ice_service_task_complete(pf);
@@@ -2082,6 -2072,7 +2072,7 @@@ test_bit(__ICE_MDD_EVENT_PENDING, pf->state) || test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) || + test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) || test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@@ -2220,8 -2211,13 +2211,13 @@@ static int ice_vsi_req_irq_msix(struct /* skip this unused q_vector */ continue; } - err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0, - q_vector->name, q_vector); + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) + err = devm_request_irq(dev, irq_num, vsi->irq_handler, + IRQF_SHARED, q_vector->name, + q_vector); + else + err = devm_request_irq(dev, irq_num, vsi->irq_handler, + 0, q_vector->name, q_vector); if (err) { netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", err); @@@ -2524,7 -2520,7 +2520,7 @@@ ice_xdp_setup_prog(struct ice_vsi *vsi }
/* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); @@@ -2975,18 -2971,11 +2971,11 @@@ static int ice_cfg_netdev(struct ice_vs struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - int err; - - err = ice_devlink_create_port(vsi); - if (err) - return err;
netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, vsi->alloc_rxq); - if (!netdev) { - err = -ENOMEM; - goto err_destroy_devlink_port; - } + if (!netdev) + return -ENOMEM;
vsi->netdev = netdev; np = netdev_priv(netdev); @@@ -3014,25 -3003,7 +3003,7 @@@ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU;
- err = register_netdev(vsi->netdev); - if (err) - goto err_free_netdev; - - devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); - - netif_carrier_off(vsi->netdev); - - /* make sure transmit queues start off as stopped */ - netif_tx_stop_all_queues(vsi->netdev); - return 0; - - err_free_netdev: - free_netdev(vsi->netdev); - vsi->netdev = NULL; - err_destroy_devlink_port: - ice_devlink_destroy_port(vsi); - return err; }
/** @@@ -3132,7 -3103,7 +3103,7 @@@ ice_vlan_rx_add_vid(struct net_device * */ ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); if (!ret) - set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
return ret; } @@@ -3171,7 -3142,7 +3142,7 @@@ ice_vlan_rx_kill_vid(struct net_device if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) ret = ice_cfg_vlan_pruning(vsi, false, false);
- set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; }
@@@ -3230,8 -3201,6 +3201,6 @@@ unroll_napi_add if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { - if (vsi->netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } @@@ -3537,14 -3506,15 +3506,14 @@@ static int ice_init_interrupt_scheme(st }
/** - * ice_is_wol_supported - get NVM state of WoL - * @pf: board private structure + * ice_is_wol_supported - check if WoL is supported + * @hw: pointer to hardware info * * Check if WoL is supported based on the HW configuration. * Returns true if NVM supports and enables WoL for this port, false otherwise */ -bool ice_is_wol_supported(struct ice_pf *pf) +bool ice_is_wol_supported(struct ice_hw *hw) { - struct ice_hw *hw = &pf->hw; u16 wol_ctrl;
/* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control @@@ -3553,7 -3523,7 +3522,7 @@@ if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl)) return false;
- return !(BIT(hw->pf_id) & wol_ctrl); + return !(BIT(hw->port_info->lport) & wol_ctrl); }
/** @@@ -3984,6 -3954,40 +3953,40 @@@ static void ice_print_wake_reason(struc dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str); }
+ /** + * ice_register_netdev - register netdev and devlink port + * @pf: pointer to the PF struct + */ + static int ice_register_netdev(struct ice_pf *pf) + { + struct ice_vsi *vsi; + int err = 0; + + vsi = ice_get_main_vsi(pf); + if (!vsi || !vsi->netdev) + return -EIO; + + err = register_netdev(vsi->netdev); + if (err) + goto err_register_netdev; + + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + err = ice_devlink_create_port(vsi); + if (err) + goto err_devlink_create; + + devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); + + return 0; + err_devlink_create: + unregister_netdev(vsi->netdev); + err_register_netdev: + free_netdev(vsi->netdev); + vsi->netdev = NULL; + return err; + } + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@@ -4191,25 -4195,28 +4194,25 @@@ ice_probe(struct pci_dev *pdev, const s goto err_send_version_unroll; }
+ /* not a fatal error if this fails */ err = ice_init_nvm_phy_type(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err); - goto err_send_version_unroll; - }
+ /* not a fatal error if this fails */ err = ice_update_link_info(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_update_link_info failed: %d\n", err); - goto err_send_version_unroll; - }
ice_init_link_dflt_override(pf->hw.port_info);
/* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + /* not a fatal error if this fails */ err = ice_init_phy_user_cfg(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err); - goto err_send_version_unroll; - }
if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) { struct ice_vsi *vsi = ice_get_main_vsi(pf); @@@ -4261,10 -4268,16 +4264,16 @@@ pcie_print_link_status(pf->pdev);
probe_done: + err = ice_register_netdev(pf); + if (err) + goto err_netdev_reg; + /* ready to go, so clear down state bit */ clear_bit(__ICE_DOWN, pf->state); + return 0;
+ err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: @@@ -4310,7 -4323,7 +4319,7 @@@ static void ice_set_wake(struct ice_pf }
/** - * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet + * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet * @pf: pointer to the PF struct * * Issue firmware command to enable multicast magic wake, making @@@ -4960,8 -4973,8 +4969,8 @@@ static void ice_set_rx_mode(struct net_ * ndo_set_rx_mode may be triggered even without a change in netdev * flags */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
/* schedule our worker thread which will take care of @@@ -5234,7 -5247,7 +5243,7 @@@ static int ice_up_complete(struct ice_v if (err) return err;
- clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_napi_enable_all(vsi); ice_vsi_ena_irq(vsi);
@@@ -5377,7 -5390,7 +5386,7 @@@ void ice_update_vsi_stats(struct ice_vs struct ice_eth_stats *cur_es = &vsi->eth_stats; struct ice_pf *pf = vsi->back;
- if (test_bit(__ICE_DOWN, vsi->state) || + if (test_bit(ICE_VSI_DOWN, vsi->state) || test_bit(__ICE_CFG_BUSY, pf->state)) return;
@@@ -5582,7 -5595,7 +5591,7 @@@ void ice_get_stats64(struct net_device * But, only call the update routine and read the registers if VSI is * not down. */ - if (!test_bit(__ICE_DOWN, vsi->state)) + if (!test_bit(ICE_VSI_DOWN, vsi->state)) ice_update_vsi_ring_stats(vsi); stats->tx_packets = vsi_stats->tx_packets; stats->tx_bytes = vsi_stats->tx_bytes; @@@ -5782,7 -5795,7 +5791,7 @@@ int ice_vsi_open_ctrl(struct ice_vsi *v if (err) goto err_up_complete;
- clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_vsi_ena_irq(vsi);
return 0; @@@ -6169,7 -6182,7 +6178,7 @@@ static int ice_change_mtu(struct net_de netdev->mtu = (unsigned int)new_mtu;
/* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { int err;
err = ice_down(vsi); @@@ -6304,89 -6317,118 +6313,118 @@@ const char *ice_stat_str(enum ice_statu }
/** - * ice_set_rss - Set RSS keys and lut + * ice_set_rss_lut - Set RSS LUT * @vsi: Pointer to VSI structure - * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure */ - int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) + int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev;
- dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL;
- status = ice_aq_set_rss_key(hw, vsi->idx, buf); + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut;
- if (status) { - dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + status = ice_aq_set_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
- if (lut) { - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; + } + + /** + * ice_set_rss_key - Set RSS key + * @vsi: Pointer to the VSI structure + * @seed: RSS hash seed + * + * Returns 0 on success, negative on failure + */ + int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed) + { + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
return 0; }
/** - * ice_get_rss - Get RSS keys and lut + * ice_get_rss_lut - Get RSS LUT * @vsi: Pointer to VSI structure - * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ - int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) + int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev;
- dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL;
- status = ice_aq_get_rss_key(hw, vsi->idx, buf); - if (status) { - dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut; + + status = ice_aq_get_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
- if (lut) { - status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; + } + + /** + * ice_get_rss_key - Get RSS key + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the key in + * + * Returns 0 on success, negative on failure + */ + int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) + { + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
return 0; @@@ -6609,7 -6651,7 +6647,7 @@@ static void ice_tx_timeout(struct net_d default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); set_bit(__ICE_DOWN, pf->state); - set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); set_bit(__ICE_SERVICE_DIS, pf->state); break; } @@@ -6631,28 -6673,6 +6669,28 @@@ * Returns 0 on success, negative value on failure */ int ice_open(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't open net device while reset is in progress"); + return -EBUSY; + } + + return ice_open_internal(netdev); +} + +/** + * ice_open_internal - Called when a network interface becomes active + * @netdev: network interface device structure + * + * Internal ice_open implementation. Should not be used directly except for ice_open and reset + * handling routine + * + * Returns 0 on success, negative value on failure + */ +int ice_open_internal(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@@ -6665,11 -6685,6 +6703,6 @@@ return -EIO; }
- if (test_bit(__ICE_DOWN, pf->state)) { - netdev_err(netdev, "device is not ready yet\n"); - return -EBUSY; - } - netif_carrier_off(netdev);
pi = vsi->port_info; @@@ -6733,12 -6748,6 +6766,12 @@@ int ice_stop(struct net_device *netdev { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't stop net device while reset is in progress"); + return -EBUSY; + }
ice_vsi_close(vsi);
diff --combined drivers/net/ethernet/intel/ice/ice_switch.c index 834cbd3f7b31,5e5683a3eb23..357d3073d814 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@@ -920,7 -920,7 +920,7 @@@ ice_create_vsi_list_map(struct ice_hw * struct ice_vsi_list_map_info *v_map; int i;
- v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL); + v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL); if (!v_map) return NULL;
@@@ -1238,9 -1238,6 +1238,9 @@@ ice_add_update_vsi_list(struct ice_hw * ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id);
+ if (!m_entry->vsi_list_info) + return ICE_ERR_NO_MEMORY; + /* If this entry was large action then the large action needs * to be updated to point to FWD to VSI list */ @@@ -2223,7 -2220,6 +2223,7 @@@ ice_vsi_uses_fltr(struct ice_fltr_mgmt_ return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI && fm_entry->fltr_info.vsi_handle == vsi_handle) || (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST && + fm_entry->vsi_list_info && (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map)))); }
@@@ -2296,12 -2292,14 +2296,12 @@@ ice_add_to_vsi_fltr_list(struct ice_hw return ICE_ERR_PARAM;
list_for_each_entry(fm_entry, lkup_list_head, list_entry) { - struct ice_fltr_info *fi; - - fi = &fm_entry->fltr_info; - if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle)) + if (!ice_vsi_uses_fltr(fm_entry, vsi_handle)) continue;
status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, - vsi_list_head, fi); + vsi_list_head, + &fm_entry->fltr_info); if (status) return status; } @@@ -2624,7 -2622,7 +2624,7 @@@ ice_remove_vsi_lkup_fltr(struct ice_hw &remove_list_head); mutex_unlock(rule_lock); if (status) - return; + goto free_fltr_list;
switch (lkup) { case ICE_SW_LKUP_MAC: @@@ -2647,7 -2645,6 +2647,7 @@@ break; }
+free_fltr_list: list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { list_del(&fm_entry->list_entry); devm_kfree(ice_hw_to_dev(hw), fm_entry); diff --combined drivers/net/ethernet/intel/ice/ice_type.h index 266036b7a49a,276ebcc309dc..21727fe39702 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@@ -192,6 -192,24 +192,24 @@@ enum ice_fltr_ptype ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_SCTP, ICE_FLTR_PTYPE_NONF_IPV4_OTHER, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER, + ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER, + ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3, + ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3, + ICE_FLTR_PTYPE_NONF_IPV4_ESP, + ICE_FLTR_PTYPE_NONF_IPV6_ESP, + ICE_FLTR_PTYPE_NONF_IPV4_AH, + ICE_FLTR_PTYPE_NONF_IPV6_AH, + ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP, + ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP, + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE, + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION, + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE, + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION, + ICE_FLTR_PTYPE_NON_IP_L2, ICE_FLTR_PTYPE_FRAG_IPV4, ICE_FLTR_PTYPE_NONF_IPV6_UDP, ICE_FLTR_PTYPE_NONF_IPV6_TCP, @@@ -535,7 -553,6 +553,7 @@@ struct ice_dcb_app_priority_table #define ICE_TLV_STATUS_ERR 0x4 #define ICE_APP_PROT_ID_FCOE 0x8906 #define ICE_APP_PROT_ID_ISCSI 0x0cbc +#define ICE_APP_PROT_ID_ISCSI_860 0x035c #define ICE_APP_PROT_ID_FIP 0x8914 #define ICE_APP_SEL_ETHTYPE 0x1 #define ICE_APP_SEL_TCPIP 0x2 @@@ -703,13 -720,13 +721,13 @@@ struct ice_hw
enum ice_aq_err pkg_dwnld_status;
- /* Driver's package ver - (from the Metadata seg) */ + /* Driver's package ver - (from the Ice Metadata section) */ struct ice_pkg_ver pkg_ver; u8 pkg_name[ICE_PKG_NAME_SIZE];
- /* Driver's Ice package version (from the Ice seg) */ - struct ice_pkg_ver ice_pkg_ver; - u8 ice_pkg_name[ICE_PKG_NAME_SIZE]; + /* Driver's Ice segment format version and ID (from the Ice seg) */ + struct ice_pkg_ver ice_seg_fmt_ver; + u8 ice_seg_id[ICE_SEG_ID_SIZE];
/* Pointer to the ice segment */ struct ice_seg *seg; @@@ -810,6 -827,14 +828,14 @@@ struct ice_hw_port_stats u64 fd_sb_match; };
+ struct ice_aq_get_set_rss_lut_params { + u16 vsi_handle; /* software VSI handle */ + u16 lut_size; /* size of the LUT buffer */ + u8 lut_type; /* type of the LUT (i.e. VSI, PF, Global) */ + u8 *lut; /* input RSS LUT for set and output RSS LUT for get */ + u8 global_lut_id; /* only valid when lut_type is global */ + }; + /* Checksum and Shadow RAM pointers */ #define ICE_SR_NVM_CTRL_WORD 0x00 #define ICE_SR_BOOT_CFG_PTR 0x132 diff --combined drivers/net/ethernet/mellanox/mlx5/core/dev.c index 9153c9bda96f,4def64d0e669..a9166cd85013 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@@ -58,9 -58,6 +58,6 @@@ static bool is_eth_supported(struct mlx if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false;
- if (is_eth_rep_supported(dev)) - return false; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return false;
@@@ -191,12 -188,12 +188,12 @@@ static bool is_ib_supported(struct mlx5 }
enum { - MLX5_INTERFACE_PROTOCOL_ETH_REP, MLX5_INTERFACE_PROTOCOL_ETH, + MLX5_INTERFACE_PROTOCOL_ETH_REP,
+ MLX5_INTERFACE_PROTOCOL_IB, MLX5_INTERFACE_PROTOCOL_IB_REP, MLX5_INTERFACE_PROTOCOL_MPIB, - MLX5_INTERFACE_PROTOCOL_IB,
MLX5_INTERFACE_PROTOCOL_VNET, }; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index bc6f77ea0a31,b425b4a539bf..e1c51eabe8fe --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -269,6 -269,7 +269,7 @@@ struct mlx5e_params struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; + bool ptp_rx; };
enum { @@@ -516,7 -517,6 +517,7 @@@ struct mlx5e_icosq struct mlx5_wq_cyc wq; void __iomem *uar_map; u32 sqn; + u16 reserved_room; unsigned long state;
/* control path */ @@@ -708,11 -708,11 +709,11 @@@ struct mlx5e_channel int cpu; };
- struct mlx5e_port_ptp; + struct mlx5e_ptp;
struct mlx5e_channels { struct mlx5e_channel **c; - struct mlx5e_port_ptp *port_ptp; + struct mlx5e_ptp *ptp; unsigned int num; struct mlx5e_params params; }; @@@ -727,10 -727,11 +728,11 @@@ struct mlx5e_channel_stats struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp;
- struct mlx5e_port_ptp_stats { + struct mlx5e_ptp_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; } ____cacheline_aligned_in_smp;
enum { @@@ -837,6 -838,7 +839,7 @@@ struct mlx5e_priv struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir ptp_tir; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS];
@@@ -856,10 -858,11 +859,11 @@@ struct mlx5e_stats stats; struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_channel_stats trap_stats; - struct mlx5e_port_ptp_stats port_ptp_stats; + struct mlx5e_ptp_stats ptp_stats; u16 max_nch; u8 max_opened_tc; - bool port_ptp_opened; + bool tx_ptp_opened; + bool rx_ptp_opened; struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; @@@ -882,7 -885,6 +886,6 @@@ #endif struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; - struct devlink_port dl_port; struct mlx5e_xsk xsk; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) struct mlx5e_hv_vhca_stats_agent stats_agent; @@@ -916,13 -918,12 +919,12 @@@ struct mlx5e_profile const struct mlx5e_rx_handlers *rx_handlers; int max_tc; u8 rq_groups; + bool rx_ptp_support; };
void mlx5e_build_ptys2ethtool_map(void);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); - bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); @@@ -965,9 -966,9 +967,9 @@@ struct mlx5e_tirc_config mlx5e_tirc_get struct mlx5e_xsk_param;
struct mlx5e_rq_param; - int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq); + int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq); int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); @@@ -1022,18 -1023,11 +1024,11 @@@ int mlx5e_num_channels_changed(struct m int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); + int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels);
- void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); - void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); - void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); - void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); - - void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); - void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@@ -1092,10 -1086,10 +1087,10 @@@ int mlx5e_create_indirect_rqt(struct ml int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
- int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); - void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); - int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); - void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); + int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); + void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); + int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); + void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); @@@ -1176,10 -1170,9 +1171,9 @@@ void mlx5e_detach_netdev(struct mlx5e_p void mlx5e_destroy_netdev(struct mlx5e_priv *priv); int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, const struct mlx5e_profile *new_profile, void *new_ppriv); + void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); - void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 68e54cc1cd16,1c44000ad675..5da5e5323a44 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@@ -29,6 -29,8 +29,8 @@@ #define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_NAT_BIT BIT(3) #define MLX5_CT_STATE_REPLY_BIT BIT(4) + #define MLX5_CT_STATE_RELATED_BIT BIT(5) + #define MLX5_CT_STATE_INVALID_BIT BIT(6)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@@ -185,28 -187,6 +187,28 @@@ mlx5_tc_ct_entry_has_nat(struct mlx5_ct return !!(entry->tuple_nat_node.next); }
+static int +mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, + u32 *labels, u32 *id) +{ + if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { + *id = 0; + return 0; + } + + if (mapping_add(ct_priv->labels_mapping, labels, id)) + return -EOPNOTSUPP; + + return 0; +} + +static void +mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) +{ + if (id) + mapping_remove(ct_priv->labels_mapping, id); +} + static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { @@@ -458,7 -438,7 +460,7 @@@ mlx5_tc_ct_entry_del_rule(struct mlx5_t mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); kfree(attr); }
@@@ -661,8 -641,8 +663,8 @@@ mlx5_tc_ct_entry_create_mod_hdr(struct if (!meta) return -EOPNOTSUPP;
- err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, - &attr->ct_attr.ct_labels_id); + err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, + &attr->ct_attr.ct_labels_id); if (err) return -EOPNOTSUPP; if (nat) { @@@ -699,7 -679,7 +701,7 @@@
err_mapping: dealloc_mod_hdr_actions(&mod_acts); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); return err; }
@@@ -717,7 -697,7 +719,7 @@@ mlx5_tc_ct_entry_add_rule(struct mlx5_t
zone_rule->nat = nat;
- spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM;
@@@ -759,7 -739,7 +761,7 @@@
zone_rule->attr = attr;
- kfree(spec); + kvfree(spec); ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0; @@@ -767,11 -747,11 +769,11 @@@ err_rule: mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: kfree(attr); err_attr: - kfree(spec); + kvfree(spec); return err; }
@@@ -1219,7 -1199,7 +1221,7 @@@ void mlx5_tc_ct_match_del(struct mlx5_t if (!priv || !ct_attr->ct_labels_id) return;
- mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id); + mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); }
int @@@ -1229,8 -1209,8 +1231,8 @@@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_ struct mlx5_ct_attr *ct_attr, struct netlink_ext_ack *extack) { + bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; struct flow_rule *rule = flow_cls_offload_flow_rule(f); - bool trk, est, untrk, unest, new, rpl, unrpl; struct flow_dissector_key_ct *mask, *key; u32 ctstate = 0, ctstate_mask = 0; u16 ct_state_on, ct_state_off; @@@ -1258,7 -1238,9 +1260,9 @@@ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | TCA_FLOWER_KEY_CT_FLAGS_NEW | - TCA_FLOWER_KEY_CT_FLAGS_REPLY)) { + TCA_FLOWER_KEY_CT_FLAGS_REPLY | + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { NL_SET_ERR_MSG_MOD(extack, "only ct_state trk, est, new and rpl are supported for offload"); return -EOPNOTSUPP; @@@ -1270,9 -1252,13 +1274,13 @@@ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; @@@ -1280,6 -1266,20 +1288,20 @@@ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; + ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; + + if (rel) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +rel isn't supported"); + return -EOPNOTSUPP; + } + + if (inv) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +inv isn't supported"); + return -EOPNOTSUPP; + }
if (new) { NL_SET_ERR_MSG_MOD(extack, @@@ -1302,7 -1302,7 +1324,7 @@@ ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; - if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) + if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) return -EOPNOTSUPP; mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, MLX5_CT_LABELS_MASK); @@@ -1562,6 -1562,14 +1584,14 @@@ mlx5_tc_ct_free_pre_ct_tables(struct ml mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); }
+ /* To avoid false lock dependency warning set the ct_entries_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ + static struct lock_class_key ct_entries_ht_lock_key; + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@@ -1596,6 -1604,8 +1626,8 @@@ if (err) goto err_init;
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, zone_params); if (err) @@@ -1697,10 -1707,10 +1729,10 @@@ __mlx5_tc_ct_flow_offload(struct mlx5_t struct mlx5_ct_ft *ft; u32 fte_id = 1;
- post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); if (!post_ct_spec || !ct_flow) { - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@@ -1810,6 -1820,10 +1842,10 @@@ ct_flow->post_ct_attr->prio = 0; ct_flow->post_ct_attr->ft = ct_priv->post_ct;
+ /* Splits were handled before CT */ + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + ct_flow->post_ct_attr->esw_attr->split_count = 0; + ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); @@@ -1835,7 -1849,7 +1871,7 @@@
attr->ct_attr.ct_flow = ct_flow; dealloc_mod_hdr_actions(&pre_mod_acts); - kfree(post_ct_spec); + kvfree(post_ct_spec);
return rule;
@@@ -1856,7 -1870,7 +1892,7 @@@ err_alloc_pre err_idr: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return ERR_PTR(err); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index e1271998b937,89d5ca91566e..9350ca05ce65 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@@ -21,11 -21,6 +21,11 @@@ enum MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, };
+struct mlx5e_encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + struct mlx5e_tc_tunnel { int tunnel_type; enum mlx5_flow_match_level match_level; @@@ -49,8 -44,6 +49,8 @@@ struct flow_cls_offload *f, void *headers_c, void *headers_v); + bool (*encap_info_equal)(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); };
extern struct mlx5e_tc_tunnel vxlan_tunnel; @@@ -83,10 -76,12 +83,12 @@@ int mlx5e_tc_tun_update_header_ipv6(str static inline int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } - int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e) + { return -EOPNOTSUPP; } + static inline int + mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } #endif int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, @@@ -108,9 -103,6 +110,9 @@@ int mlx5e_tc_tun_parse_udp_ports(struc void *headers_c, void *headers_v);
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + #endif /* CONFIG_MLX5_ESWITCH */
#endif //__MLX5_EN_TC_TUNNEL_H__ diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 9f16ad2c0710,01d435e15ad3..593503bc4d07 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@@ -2,6 -2,7 +2,7 @@@ /* Copyright (c) 2021 Mellanox Technologies. */
#include <net/fib_notifier.h> + #include <net/nexthop.h> #include "tc_tun_encap.h" #include "en_tc.h" #include "tc_tun.h" @@@ -476,11 -477,16 +477,11 @@@ void mlx5e_detach_decap(struct mlx5e_pr mlx5e_decap_dealloc(priv, d); }
-struct encap_key { - const struct ip_tunnel_key *ip_tun_key; - struct mlx5e_tc_tunnel *tc_tunnel; -}; - -static int cmp_encap_info(struct encap_key *a, - struct encap_key *b) +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) { - return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && + a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; }
static int cmp_decap_info(struct mlx5e_decap_key *a, @@@ -489,7 -495,7 +490,7 @@@ return memcmp(&a->key, &b->key, sizeof(b->key)); }
-static int hash_encap_info(struct encap_key *key) +static int hash_encap_info(struct mlx5e_encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); @@@ -511,18 -517,18 +512,18 @@@ static bool mlx5e_decap_take(struct mlx }
static struct mlx5e_encap_entry * -mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, +mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, uintptr_t hash_key) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_key e_key; struct mlx5e_encap_entry *e; - struct encap_key e_key;
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { e_key.ip_tun_key = &e->tun_info->key; e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, key) && + if (e->tunnel->encap_info_equal(&e_key, key) && mlx5e_encap_take(e)) return e; } @@@ -689,8 -695,8 +690,8 @@@ int mlx5e_attach_encap(struct mlx5e_pri struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; unsigned long tbl_time_before = 0; - struct encap_key key; struct mlx5e_encap_entry *e; + struct mlx5e_encap_key key; bool entry_created = false; unsigned short family; uintptr_t hash_key; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 19d22a63313f,f7c880edae37..8c0f78c09215 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@@ -46,8 -46,7 +46,8 @@@ struct mlx5e_ktls_offload_context_rx struct tls12_crypto_info_aes_gcm_128 crypto_info; struct accel_rule rule; struct sock *sk; - struct mlx5e_rq_stats *stats; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; u32 tirn; u32 key_id; @@@ -85,7 -84,7 +85,7 @@@ static int mlx5e_ktls_create_tir(struc
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); MLX5_SET(tirc, tirc, indirect_table, rqtn); @@@ -138,10 -137,11 +138,10 @@@ post_static_params(struct mlx5e_icosq * { struct mlx5e_set_tls_static_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs;
num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC);
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@@ -168,10 -168,11 +168,10 @@@ post_progress_params(struct mlx5e_icos { struct mlx5e_set_tls_progress_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs;
num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC);
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@@ -217,7 -218,7 +217,7 @@@ unlock return err;
err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; err = PTR_ERR(cseg); complete(&priv_rx->add_ctx); goto unlock; @@@ -276,15 -277,17 +276,15 @@@ resync_post_get_progress_params(struct
buf->priv_rx = priv_rx;
- BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); - spin_lock_bh(&sq->channel->async_icosq_lock);
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { spin_unlock_bh(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; }
- pi = mlx5e_icosq_get_next_pi(sq, 1); + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS); wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi);
#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)) @@@ -304,7 -307,7 +304,7 @@@
wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS, - .num_wqebbs = 1, + .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS, .tls_get_params.buf = buf, }; icosq_fill_wi(sq, pi, &wi); @@@ -319,7 -322,7 +319,7 @@@ err_dma_unmap err_free: kfree(buf); err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; return err; }
@@@ -375,13 -378,13 +375,13 @@@ static int resync_handle_seq_match(stru
cseg = post_static_params(sq, priv_rx); if (IS_ERR(cseg)) { - priv_rx->stats->tls_resync_res_skip++; + priv_rx->rq_stats->tls_resync_res_skip++; err = PTR_ERR(cseg); goto unlock; } /* Do not increment priv_rx refcnt, CQE handling is empty */ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); - priv_rx->stats->tls_resync_res_ok++; + priv_rx->rq_stats->tls_resync_res_ok++; unlock: spin_unlock_bh(&c->async_icosq_lock);
@@@ -417,13 -420,13 +417,13 @@@ void mlx5e_ktls_handle_get_psv_completi auth_state = MLX5_GET(tls_progress_params, ctx, auth_state); if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; goto out; }
hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); - priv_rx->stats->tls_resync_req_end++; + priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@@ -606,8 -609,7 +606,8 @@@ int mlx5e_ktls_add_rx(struct net_devic priv_rx->rxq = rxq; priv_rx->sk = sk;
- priv_rx->stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
rqtn = priv->direct_tir[rxq].rqt.rqtn; @@@ -628,7 -630,7 +628,7 @@@ if (err) goto err_post_wqes;
- priv_rx->stats->tls_ctx++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx);
return 0;
@@@ -664,7 -666,7 +664,7 @@@ void mlx5e_ktls_del_rx(struct net_devic if (cancel_work_sync(&resync->work)) mlx5e_ktls_priv_rx_put(priv_rx);
- priv_rx->stats->tls_del++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_del); if (priv_rx->rule.rule) mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 53802e18af90,964558086ad6..b185a0452629 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -34,6 -34,7 +34,7 @@@ #include "en/port.h" #include "en/params.h" #include "en/xsk/pool.h" + #include "en/ptp.h" #include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@@ -368,7 -369,7 +369,7 @@@ int mlx5e_ethtool_set_ringparam(struct new_channels.params.log_rq_mtu_frames = log_rq_size; new_channels.params.log_sq_size = log_sq_size;
- err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_channels.params); if (err) goto unlock;
@@@ -758,11 -759,11 +759,11 @@@ static int get_fec_supported_advertised return 0; }
-static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, - u32 eth_proto_cap, - u8 connector_type, bool ext) +static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, + struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, u8 connector_type) { - if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) @@@ -898,9 -899,9 +899,9 @@@ static int ptys2connector_type[MLX5E_CO [MLX5E_PORT_OTHER] = PORT_OTHER, };
-static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext) +static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type) { - if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) return ptys2connector_type[connector_type];
if (eth_proto & @@@ -1001,11 -1002,11 +1002,11 @@@ int mlx5e_ethtool_get_link_ksettings(st data_rate_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - - link_ksettings->base.port = get_connector_port(eth_proto_oper, - connector_type, ext); - ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, - connector_type, ext); + connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? + connector_type : MLX5E_PORT_UNKNOWN; + link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type); + ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin, + connector_type); get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
if (an_status == MLX5_AN_COMPLETE) @@@ -1865,13 -1866,19 +1866,19 @@@ int mlx5e_modify_rx_cqe_compression_loc
new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) + new_channels.params.ptp_rx = new_val;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; return 0; }
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + else + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, + &new_channels.params.ptp_rx); if (err) return err;
@@@ -1892,11 -1899,6 +1899,6 @@@ static int set_pflag_rx_cqe_compress(st if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP;
- if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { - netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); - return -EINVAL; - } - err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); if (err) return err; @@@ -2032,7 -2034,7 +2034,7 @@@ static int set_pflag_tx_port_ts(struct mlx5e_num_channels_changed_ctx, NULL); out: if (!err) - priv->port_ptp_opened = true; + priv->tx_ptp_opened = true;
return err; } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5db63b9f3b70,773449c1424b..2f47608bb9b9 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -87,51 -87,6 +87,6 @@@ bool mlx5e_check_fragmented_striding_rq return true; }
- void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) - { - params->log_rq_mtu_frames = is_kdump_kernel() ? - MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - - mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : - BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); - } - - bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) - { - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return false; - - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; - - if (params->xdp_prog) { - /* XSK params are not considered here. If striding RQ is in use, - * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will - * be called with the known XSK params. - */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return false; - } - - return true; - } - - void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) - { - params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_CYCLIC; - } - void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@@ -259,18 -214,17 +214,17 @@@ static inline void mlx5e_build_umr_wqe( ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); }
- static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, - struct mlx5e_channel *c) + static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, sizeof(*rq->mpwqe.info)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM;
- mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe); + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
return 0; } @@@ -302,7 -256,7 +256,7 @@@ static int mlx5e_create_umr_mkey(struc MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); @@@ -419,58 -373,53 +373,53 @@@ static void mlx5e_free_mpwqe_rq_drop_pa __free_page(rq->wqe_overflow.page); }
- static int mlx5e_alloc_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, + static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) + { + struct mlx5_core_dev *mdev = c->mdev; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, NULL); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); + } + + static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq_param *rqp, - struct mlx5e_rq *rq) + int node, struct mlx5e_rq *rq) { struct page_pool_params pp_params = { 0 }; - struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); - u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; int i;
- rqp->wq.db_numa_node = cpu_to_node(c->cpu); - - rq->wq_type = params->rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->priv = c->priv; - rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; - rq->icosq = &c->icosq; - rq->ix = c->ix; - rq->mdev = mdev; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->xdpsq = &c->rq_xdpsq; - rq->xsk_pool = xsk_pool; - rq->ptp_cyc2time = mlx5_is_real_time_rq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; - - if (rq->xsk_pool) - rq->stats = &c->priv->channel_stats[c->ix].xskrq; - else - rq->stats = &c->priv->channel_stats[c->ix].rq; + rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
- rq_xdp_ix = rq->ix; - if (xsk) - rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); - if (err < 0) - goto err_rq_xdp_prog; - rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; @@@ -480,7 -429,7 +429,7 @@@ err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog;
err = mlx5e_alloc_mpwqe_rq_drop_page(rq); if (err) @@@ -504,7 -453,7 +453,7 @@@ goto err_rq_drop_page; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
- err = mlx5e_rq_alloc_mpwqe_info(rq, c); + err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) goto err_rq_mkey; break; @@@ -512,7 -461,7 +461,7 @@@ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
@@@ -524,23 -473,19 +473,19 @@@ rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->wqe.frags) { err = -ENOMEM; goto err_rq_wq_destroy; }
- err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu)); + err = mlx5e_init_di_list(rq, wq_sz, node); if (err) goto err_rq_frags;
- rq->mkey_be = c->mkey_be; + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key); }
- err = mlx5e_rq_set_handlers(rq, params, xsk); - if (err) - goto err_free_by_rq_type; - if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); @@@ -550,8 -495,8 +495,8 @@@ pp_params.order = 0; pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; + pp_params.nid = node; + pp_params.dev = rq->pdev; pp_params.dma_dir = rq->buff.map_dir;
/* page_pool can be used even when there is no rq->xdp_prog, @@@ -635,8 -580,6 +580,6 @@@ err_rq_frags } err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); - err_rq_xdp: - xdp_rxq_info_unreg(&rq->xdp_rxq); err_rq_xdp_prog: if (params->xdp_prog) bpf_prog_put(params->xdp_prog); @@@ -649,10 -592,12 +592,12 @@@ static void mlx5e_free_rq(struct mlx5e_ struct bpf_prog *old_prog; int i;
- old_prog = rcu_dereference_protected(rq->xdp_prog, - lockdep_is_held(&rq->priv->state_lock)); - if (old_prog) - bpf_prog_put(old_prog); + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + }
switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@@ -888,13 -833,14 +833,14 @@@ void mlx5e_free_rx_descs(struct mlx5e_r
}
- int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq) + int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq) { + struct mlx5_core_dev *mdev = rq->mdev; int err;
- err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq); + err = mlx5e_alloc_rq(params, xsk, param, node, rq); if (err) return err;
@@@ -906,28 -852,28 +852,28 @@@ if (err) goto err_destroy_rq;
- if (mlx5e_is_tls_on(c->priv) && !mlx5_accel_is_ktls_device(c->mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &c->rq.state); /* must be FPGA */ + if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev)) + __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
- if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full)) - __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state); + if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
/* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render * skb->checksum incorrect. */ - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) - __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
/* For CQE compression on striding RQ, use stride index provided by * HW if capability is supported. */ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && - MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index)) - __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state); + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
return 0;
@@@ -942,7 -888,10 +888,10 @@@ err_free_rq void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_trigger_irq(rq->icosq); + if (rq->icosq) + mlx5e_trigger_irq(rq->icosq); + else + napi_schedule(rq->cq.napi); }
void mlx5e_deactivate_rq(struct mlx5e_rq *rq) @@@ -954,7 -903,8 +903,8 @@@ void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - cancel_work_sync(&rq->icosq->recover_work); + if (rq->icosq) + cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@@ -1019,7 -969,7 +969,7 @@@ static int mlx5e_alloc_xdpsq(struct mlx sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->xsk_pool = xsk_pool; @@@ -1090,8 -1040,7 +1040,8 @@@ static int mlx5e_alloc_icosq(struct mlx int err;
sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->reserved_room = param->stop_room;
param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@@ -1175,7 -1124,7 +1125,7 @@@ static int mlx5e_alloc_txqsq(struct mlx sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); @@@ -1188,9 -1137,7 +1138,7 @@@ if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); sq->stop_room = param->stop_room; - sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@@ -1258,7 -1205,7 +1206,7 @@@ static int mlx5e_create_sq(struct mlx5_ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); @@@ -1861,14 -1808,16 +1809,16 @@@ static int mlx5e_set_tx_maxrate(struct return err; }
- void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) + static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params) { - *ccp = (struct mlx5e_create_cq_param) { - .napi = &c->napi, - .ch_stats = c->stats, - .node = cpu_to_node(c->cpu), - .ix = c->ix, - }; + int err; + + err = mlx5e_init_rxq_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); }
static int mlx5e_open_queues(struct mlx5e_channel *c, @@@ -1931,7 -1880,7 +1881,7 @@@ goto err_close_sqs; }
- err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); if (err) goto err_close_xdp_sq;
@@@ -2033,7 -1982,7 +1983,7 @@@ static int mlx5e_open_channel(struct ml c->cpu = cpu; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; @@@ -2112,314 -2061,6 +2062,6 @@@ static void mlx5e_close_channel(struct kvfree(c); }
- #define DEFAULT_FRAG_SIZE (2048) - - static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) - { - u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); - int frag_size_max = DEFAULT_FRAG_SIZE; - u32 buf_size = 0; - int i; - - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - - if (mlx5e_rx_is_linear_skb(params, xsk)) { - int frag_stride; - - frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); - frag_stride = roundup_pow_of_two(frag_stride); - - info->arr[0].frag_size = byte_count; - info->arr[0].frag_stride = frag_stride; - info->num_frags = 1; - info->wqe_bulk = PAGE_SIZE / frag_stride; - goto out; - } - - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) - frag_size_max = PAGE_SIZE; - - i = 0; - while (buf_size < byte_count) { - int frag_size = byte_count - buf_size; - - if (i < MLX5E_MAX_RX_FRAGS - 1) - frag_size = min(frag_size, frag_size_max); - - info->arr[i].frag_size = frag_size; - info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - - buf_size += frag_size; - i++; - } - info->num_frags = i; - /* number of different wqes sharing a page */ - info->wqe_bulk = 1 + (info->num_frags % 2); - - out: - info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); - info->log_num_frags = order_base_2(info->num_frags); - } - - static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) - { - int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; - - switch (wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - sz += sizeof(struct mlx5e_rx_wqe_ll); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - sz += sizeof(struct mlx5e_rx_wqe_cyc); - } - - return order_base_2(sz); - } - - static u8 mlx5e_get_rq_log_wq_sz(void *rqc) - { - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - return MLX5_GET(wq, wq, log_wq_sz); - } - - void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *param) - { - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - int ndsegs = 1; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); - MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); - ndsegs = param->frags_info.num_frags; - } - - MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); - MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); - MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); - MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); - MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - mlx5e_build_rx_cq_param(priv, params, xsk, ¶m->cqp); - } - - static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, - struct mlx5e_rq_param *param) - { - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); - MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - } - - void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) - { - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); - } - - void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_sq_param *param) - { - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - bool allow_swp; - - allow_swp = mlx5_geneve_tx_allowed(priv->mdev) || - !!MLX5_IPSEC_DEV(priv->mdev); - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - MLX5_SET(sqc, sqc, allow_swp, allow_swp); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); - param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); - } - - static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param) - { - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); - if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) - MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); - } - - void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_cq_param *param) - { - struct mlx5_core_dev *mdev = priv->mdev; - bool hw_stridx = false; - void *cqc = param->cqc; - u8 log_cq_size; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); - hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - log_cq_size = params->log_rq_mtu_frames; - } - - MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? - MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); - MLX5_SET(cqc, cqc, cqe_comp_en, 1); - } - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; - } - - void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) - { - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; - } - - void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) - { - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); - - mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - } - - void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) - { - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - - MLX5_SET(wq, wq, log_wq_sz, log_wq_size); - MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - mlx5e_build_ico_cq_param(priv, log_wq_size, ¶m->cqp); - } - - static void mlx5e_build_async_icosq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - u8 log_wq_size, - struct mlx5e_sq_param *param) - { - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - - /* async_icosq is used by XSK only if xdp_prog is active */ - if (params->xdp_prog) - param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ - MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - MLX5_SET(wq, wq, log_wq_sz, log_wq_size); - mlx5e_build_ico_cq_param(priv, log_wq_size, ¶m->cqp); - } - - void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) - { - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); - } - - static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, - struct mlx5e_rq_param *rqp) - { - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, - order_base_2(MLX5E_UMR_WQEBBS) + - mlx5e_get_rq_log_wq_sz(rqp->rqc)); - default: /* MLX5_WQ_TYPE_CYCLIC */ - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - } - } - - static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev) - { - if (netdev->hw_features & NETIF_F_HW_TLS_RX) - return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - } - - static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam) - { - u8 icosq_log_wq_sz, async_icosq_log_wq_sz; - - mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); - - icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); - async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev); - - mlx5e_build_sq_param(priv, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_async_icosq_param(priv, params, async_icosq_log_wq_sz, &cparam->async_icosq); - } - int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { @@@ -2434,7 -2075,7 +2076,7 @@@ if (!chs->c || !cparam) goto err_free;
- mlx5e_build_channel_param(priv, &chs->params, cparam); + mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL;
@@@ -2446,9 -2087,8 +2088,8 @@@ goto err_close_channels; }
- if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) { - err = mlx5e_port_ptp_open(priv, &chs->params, chs->c[0]->lag_port, - &chs->port_ptp); + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); if (err) goto err_close_channels; } @@@ -2462,8 -2102,8 +2103,8 @@@ return 0;
err_close_ptp: - if (chs->port_ptp) - mlx5e_port_ptp_close(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_close(chs->ptp);
err_close_channels: for (i--; i >= 0; i--) @@@ -2483,8 -2123,8 +2124,8 @@@ static void mlx5e_activate_channels(str for (i = 0; i < chs->num; i++) mlx5e_activate_channel(chs->c[i]);
- if (chs->port_ptp) - mlx5e_ptp_activate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_activate_channel(chs->ptp); }
#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ @@@ -2511,8 -2151,8 +2152,8 @@@ static void mlx5e_deactivate_channels(s { int i;
- if (chs->port_ptp) - mlx5e_ptp_deactivate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_deactivate_channel(chs->ptp);
for (i = 0; i < chs->num; i++) mlx5e_deactivate_channel(chs->c[i]); @@@ -2522,11 -2162,10 +2163,10 @@@ void mlx5e_close_channels(struct mlx5e_ { int i;
- if (chs->port_ptp) { - mlx5e_port_ptp_close(chs->port_ptp); - chs->port_ptp = NULL; + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; } - for (i = 0; i < chs->num; i++) mlx5e_close_channel(chs->c[i]);
@@@ -2582,12 -2221,12 +2222,12 @@@ int mlx5e_create_indirect_rqt(struct ml return err; }
- int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) + int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int err; int ix;
- for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); if (unlikely(err)) goto err_destroy_rqts; @@@ -2603,11 -2242,11 +2243,11 @@@ err_destroy_rqts return err; }
- void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) + void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i;
- for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_rqt(priv, &tirs[i].rqt); }
@@@ -2690,7 -2329,8 +2330,8 @@@ static u32 mlx5e_get_direct_rqn(struct }
static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, - struct mlx5e_redirect_rqt_param rrp) + struct mlx5e_redirect_rqt_param rrp, + struct mlx5e_redirect_rqt_param *ptp_rrp) { u32 rqtn; int ix; @@@ -2716,11 -2356,17 +2357,17 @@@ rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } + if (ptp_rrp) { + rqtn = priv->ptp_tir.rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp); + } }
static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { + bool rx_ptp_support = priv->profile->rx_ptp_support; + struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL; struct mlx5e_redirect_rqt_param rrp = { .is_rss = true, { @@@ -2730,12 -2376,22 +2377,22 @@@ } }, }; + struct mlx5e_redirect_rqt_param ptp_rrp; + + if (rx_ptp_support) { + u32 ptp_rqn;
- mlx5e_redirect_rqts(priv, rrp); + ptp_rrp.is_rss = false; + ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ? + priv->drop_rq.rqn : ptp_rqn; + ptp_rrp_p = &ptp_rrp; + } + mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p); }
static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) { + bool rx_ptp_support = priv->profile->rx_ptp_support; struct mlx5e_redirect_rqt_param drop_rrp = { .is_rss = false, { @@@ -2743,7 -2399,7 +2400,7 @@@ }, };
- mlx5e_redirect_rqts(priv, drop_rrp); + mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL); }
static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { @@@ -3032,6 -2688,8 +2689,8 @@@ static int mlx5e_update_netdev_queues(s nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; num_rxqs = nch * priv->profile->rq_groups; + if (priv->channels.params.ptp_rx) + num_rxqs++;
mlx5e_netdev_set_tcs(netdev, nch, ntc);
@@@ -3117,11 -2775,14 +2776,14 @@@ static void mlx5e_build_txq_maps(struc } }
- if (!priv->channels.port_ptp) + if (!priv->channels.ptp) + return; + + if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) return;
for (tc = 0; tc < num_tc; tc++) { - struct mlx5e_port_ptp *c = priv->channels.port_ptp; + struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq; @@@ -3395,7 -3056,7 +3057,7 @@@ int mlx5e_open_drop_rq(struct mlx5e_pri struct mlx5e_cq *cq = &drop_rq->cq; int err;
- mlx5e_build_drop_rq_param(priv, &rq_param); + mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); if (err) @@@ -3443,10 -3104,10 +3105,10 @@@ int mlx5e_create_tis(struct mlx5_core_d { void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
if (MLX5_GET(tisc, tisc, tls_en)) - MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); @@@ -3516,7 -3177,7 +3178,7 @@@ static void mlx5e_cleanup_nic_tx(struc static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, @@@ -3608,7 -3269,7 +3270,7 @@@ err_destroy_inner_tirs return err; }
- int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) + int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { struct mlx5e_tir *tir; void *tirc; @@@ -3622,7 -3283,7 +3284,7 @@@ if (!in) return -ENOMEM;
- for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { memset(in, 0, inlen); tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); @@@ -3660,11 -3321,11 +3322,11 @@@ void mlx5e_destroy_indirect_tirs(struc mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); }
- void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) + void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i;
- for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_tir(priv->mdev, &tirs[i]); }
@@@ -3791,8 -3452,16 +3453,16 @@@ static int mlx5e_setup_tc(struct net_de void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + bool tc_unbind = false; int err;
+ if (type == TC_SETUP_BLOCK && + ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) + tc_unbind = true; + + if (!netif_device_present(dev) && !tc_unbind) + return -ENODEV; + switch (type) { case TC_SETUP_BLOCK: { struct flow_block_offload *f = type_data; @@@ -3837,15 -3506,22 +3507,22 @@@ void mlx5e_fold_sw_stats64(struct mlx5e s->tx_dropped += sq_stats->dropped; } } - if (priv->port_ptp_opened) { + if (priv->tx_ptp_opened) { for (i = 0; i < priv->max_opened_tc; i++) { - struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i]; + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; s->tx_dropped += sq_stats->dropped; } } + if (priv->rx_ptp_opened) { + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->multicast += rq_stats->mcast_packets; + } }
void @@@ -3854,6 -3530,9 +3531,9 @@@ mlx5e_get_stats(struct net_device *dev struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ if (!netif_device_present(dev)) + return; + /* In switchdev mode, monitor counters doesn't monitor * rx/tx stats of 802_3. The update stats mechanism * should keep the 802_3 layout counters updated @@@ -3895,11 -3574,19 +3575,19 @@@ stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; }
+ static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) + { + if (mlx5e_is_uplink_rep(priv)) + return; /* no rx mode for uplink rep */ + + queue_work(priv->wq, &priv->set_rx_mode_work); + } + static void mlx5e_set_rx_mode(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); }
static int mlx5e_set_mac(struct net_device *netdev, void *addr) @@@ -3914,7 -3601,7 +3602,7 @@@ ether_addr_copy(netdev->dev_addr, saddr->sa_data); netif_addr_unlock_bh(netdev);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
return 0; } @@@ -4136,7 -3823,8 +3824,8 @@@ static netdev_features_t mlx5e_fix_feat
mutex_lock(&priv->state_lock); params = &priv->channels.params; - if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + if (!priv->fs.vlan || + !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) { /* HW strips the outer C-tag header, this is a problem * for S-tag traffic. */ @@@ -4219,7 -3907,7 +3908,7 @@@ int mlx5e_change_mtu(struct net_device
new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_channels.params); if (err) goto out;
@@@ -4283,9 -3971,18 +3972,18 @@@ static int mlx5e_change_nic_mtu(struct return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); }
+ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) + { + bool set = *(bool *)ctx; + + return mlx5e_ptp_rx_manage_fs(priv, set); + } + int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { + struct mlx5e_channels new_channels = {}; struct hwtstamp_config config; + bool rx_cqe_compress_def; int err;
if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || @@@ -4305,11 -4002,13 +4003,13 @@@ }
mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + new_channels.params.ptp_rx = false; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@@ -4326,15 -4025,7 +4026,7 @@@ case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) - netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } + new_channels.params.ptp_rx = rx_cqe_compress_def; config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: @@@ -4342,6 -4033,20 +4034,20 @@@ return -ERANGE; }
+ if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + goto out; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, + &new_channels.params.ptp_rx); + if (err) { + mutex_unlock(&priv->state_lock); + return err; + } + out: memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock);
@@@ -4452,6 -4157,9 +4158,9 @@@ static int mlx5e_set_vf_link_state(stru struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev;
+ if (mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, mlx5_ifla_link2vport(link_state)); } @@@ -4463,6 -4171,9 +4172,9 @@@ int mlx5e_get_vf_config(struct net_devi struct mlx5_core_dev *mdev = priv->mdev; int err;
+ if (!netif_device_present(dev)) + return -EOPNOTSUPP; + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); if (err) return err; @@@ -4479,6 -4190,32 +4191,32 @@@ int mlx5e_get_vf_stats(struct net_devic return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } + + static bool + mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) + { + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return false; + + if (!mlx5e_is_uplink_rep(priv)) + return false; + + return mlx5e_rep_has_offload_stats(dev, attr_id); + } + + static int + mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) + { + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5e_rep_get_offload_stats(attr_id, dev, sp); + } #endif
static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) @@@ -4837,6 -4574,8 +4575,8 @@@ const struct net_device_ops mlx5e_netde .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif .ndo_get_devlink_port = mlx5e_get_devlink_port, }; @@@ -4850,93 -4589,6 +4590,6 @@@ void mlx5e_build_default_indir_rqt(u32 indirection_rqt[i] = i % num_channels; }
- static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) - { - u32 link_speed = 0; - u32 pci_bw = 0; - - mlx5e_port_max_linkspeed(mdev, &link_speed); - pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); - mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - - #define MLX5E_SLOW_PCI_RATIO (2) - - return link_speed && pci_bw && - link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; - } - - static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) - { - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; - } - - static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) - { - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; - } - - static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) - { - return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - DIM_CQ_PERIOD_MODE_START_FROM_CQE : - DIM_CQ_PERIOD_MODE_START_FROM_EQE; - } - - void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) - { - if (params->tx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); - } else { - params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); - } - } - - void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) - { - if (params->rx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); - } else { - params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); - } - } - - void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) - { - mlx5e_reset_tx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, - params->tx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - } - - void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) - { - mlx5e_reset_rx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - } - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@@ -4949,25 -4601,6 +4602,6 @@@ return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); }
- void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) - { - /* Prefer Striding RQ, unless any of the following holds: - * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. - * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. - * - * No XSK params: checking the availability of striding RQ in general. - */ - if (!slow_pci_heuristic(mdev) && - mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || - !mlx5e_rx_is_linear_skb(params, NULL))) - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); - mlx5e_set_rq_type(mdev, params); - mlx5e_init_rq_type_params(mdev, params); - } - void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels) { @@@ -5298,10 -4931,6 +4932,6 @@@ static int mlx5e_nic_init(struct mlx5_c if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
- err = mlx5e_devlink_port_register(priv); - if (err) - mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); - mlx5e_health_create_reporters(priv);
return 0; @@@ -5310,7 -4939,6 +4940,6 @@@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_devlink_port_unregister(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); } @@@ -5318,6 -4946,7 +4947,7 @@@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err;
mlx5e_create_q_counters(priv); @@@ -5332,7 -4961,7 +4962,7 @@@ if (err) goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts;
@@@ -5340,22 -4969,30 +4970,30 @@@ if (err) goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs;
- err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_direct_tirs;
- err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_xsk_rqts;
+ err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_xsk_tirs; + + err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_ptp_rqt; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_xsk_tirs; + goto err_destroy_ptp_direct_tir; }
err = mlx5e_tc_nic_init(priv); @@@ -5376,16 -5013,20 +5014,20 @@@ err_tc_nic_cleanup mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); + err_destroy_ptp_direct_tir: + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); + err_destroy_ptp_rqt: + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); err_destroy_xsk_tirs: - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); err_destroy_xsk_rqts: - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@@ -5397,14 -5038,18 +5039,18 @@@ err_destroy_q_counters
static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@@ -5450,7 -5095,7 +5096,7 @@@ static void mlx5e_nic_enable(struct mlx return; mlx5e_dcbnl_init_app(priv);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
rtnl_lock(); if (netif_running(netdev)) @@@ -5473,7 -5118,7 +5119,7 @@@ static void mlx5e_nic_disable(struct ml netif_device_detach(priv->netdev); rtnl_unlock();
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) @@@ -5512,6 -5157,7 +5158,7 @@@ static const struct mlx5e_profile mlx5e .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, + .rx_ptp_support = true, };
/* mlx5e generic netdev management API (move to en_common.c) */ @@@ -5746,6 -5392,11 +5393,11 @@@ rollback return err; }
+ void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) + { + mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); + } + void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; @@@ -5828,10 -5479,17 +5480,17 @@@ static int mlx5e_probe(struct auxiliary
priv->profile = profile; priv->ppriv = NULL; + + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_destroy_netdev; + } + err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); - goto err_destroy_netdev; + goto err_devlink_cleanup; }
err = mlx5e_resume(adev); @@@ -5849,12 -5507,15 +5508,15 @@@ mlx5e_devlink_port_type_eth_set(priv);
mlx5e_dcbnl_init_app(priv); + mlx5_uplink_netdev_set(mdev, netdev); return 0;
err_resume: mlx5e_suspend(adev, state); err_profile_cleanup: profile->cleanup(priv); + err_devlink_cleanup: + mlx5e_devlink_port_unregister(priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); return err; @@@ -5869,6 -5530,7 +5531,7 @@@ static void mlx5e_remove(struct auxilia unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); + mlx5e_devlink_port_unregister(priv); mlx5e_destroy_netdev(priv); }
@@@ -5894,18 -5556,18 +5557,18 @@@ int mlx5e_init(void
mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); - ret = mlx5e_rep_init(); + ret = auxiliary_driver_register(&mlx5e_driver); if (ret) return ret;
- ret = auxiliary_driver_register(&mlx5e_driver); + ret = mlx5e_rep_init(); if (ret) - mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); return ret; }
void mlx5e_cleanup(void) { - auxiliary_driver_unregister(&mlx5e_driver); mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8d39bfee84a9,9ef8e4a671a7..a9d33682d50e --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@@ -40,10 -40,12 +40,12 @@@ #include "eswitch.h" #include "en.h" #include "en_rep.h" + #include "en/params.h" #include "en/txrx.h" #include "en_tc.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" + #include "en/devlink.h" #include "fs_core.h" #include "lib/mlx5.h" #define CREATE_TRACE_POINTS @@@ -69,16 -71,6 +71,6 @@@ static void mlx5e_rep_get_drvinfo(struc fw_rev_sub(mdev), mdev->board_id); }
- static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) - { - struct mlx5e_priv *priv = netdev_priv(dev); - - mlx5e_rep_get_drvinfo(dev, drvinfo); - strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev), - sizeof(drvinfo->bus_info)); - } - static const struct counter_desc sw_rep_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, @@@ -285,46 -277,6 +277,6 @@@ static u32 mlx5e_rep_get_rxfh_indir_siz return mlx5e_ethtool_get_rxfh_indir_size(priv); }
- static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev, - struct ethtool_pause_stats *stats) - { - struct mlx5e_priv *priv = netdev_priv(netdev); - - mlx5e_stats_pause_get(priv, stats); - } - - static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) - { - struct mlx5e_priv *priv = netdev_priv(netdev); - - mlx5e_ethtool_get_pauseparam(priv, pauseparam); - } - - static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) - { - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_set_pauseparam(priv, pauseparam); - } - - static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *link_ksettings) - { - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); - } - - static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *link_ksettings) - { - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); - } - static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@@ -344,34 -296,6 +296,6 @@@ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, };
- static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USE_ADAPTIVE, - .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_strings = mlx5e_rep_get_strings, - .get_sset_count = mlx5e_rep_get_sset_count, - .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, - .get_ringparam = mlx5e_rep_get_ringparam, - .set_ringparam = mlx5e_rep_set_ringparam, - .get_channels = mlx5e_rep_get_channels, - .set_channels = mlx5e_rep_set_channels, - .get_coalesce = mlx5e_rep_get_coalesce, - .set_coalesce = mlx5e_rep_set_coalesce, - .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings, - .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, - .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, - .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, - .get_rxfh = mlx5e_get_rxfh, - .set_rxfh = mlx5e_set_rxfh, - .get_rxnfc = mlx5e_get_rxnfc, - .set_rxnfc = mlx5e_set_rxnfc, - .get_pause_stats = mlx5e_uplink_rep_get_pause_stats, - .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, - .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, - }; - static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { @@@ -522,7 -446,7 +446,7 @@@ bool mlx5e_is_uplink_rep(struct mlx5e_p return (rep->vport == MLX5_VPORT_UPLINK); }
- static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) + bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@@ -542,8 -466,8 +466,8 @@@ mlx5e_get_sw_stats64(const struct net_d return 0; }
- static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) + int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@@ -568,34 -492,6 +492,6 @@@ static int mlx5e_rep_change_mtu(struct return mlx5e_change_mtu(netdev, new_mtu, NULL); }
- static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) - { - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); - } - - static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) - { - struct sockaddr *saddr = addr; - - if (!is_valid_ether_addr(saddr->sa_data)) - return -EADDRNOTAVAIL; - - ether_addr_copy(netdev->dev_addr, saddr->sa_data); - return 0; - } - - static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, - __be16 vlan_proto) - { - netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); - - if (vlan != 0) - return -EOPNOTSUPP; - - /* allow setting 0-vid for compatibility with libvirt */ - return 0; - } - static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@@ -641,29 -537,10 +537,10 @@@ static const struct net_device_ops mlx5 .ndo_change_carrier = mlx5e_rep_change_carrier, };
- static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { - .ndo_open = mlx5e_open, - .ndo_stop = mlx5e_close, - .ndo_start_xmit = mlx5e_xmit, - .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, - .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, - .ndo_get_stats64 = mlx5e_get_stats, - .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, - .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_uplink_rep_change_mtu, - .ndo_features_check = mlx5e_features_check, - .ndo_set_vf_mac = mlx5e_set_vf_mac, - .ndo_set_vf_rate = mlx5e_set_vf_rate, - .ndo_get_vf_config = mlx5e_get_vf_config, - .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, - .ndo_set_features = mlx5e_set_features, - }; - bool mlx5e_eswitch_uplink_rep(struct net_device *netdev) { - return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep; + return netdev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_is_uplink_rep(netdev_priv(netdev)); }
bool mlx5e_eswitch_vf_rep(struct net_device *netdev) @@@ -713,26 -590,15 +590,15 @@@ static void mlx5e_build_rep_params(stru }
static void mlx5e_build_rep_netdev(struct net_device *netdev, - struct mlx5_core_dev *mdev, - struct mlx5_eswitch_rep *rep) + struct mlx5_core_dev *mdev) { SET_NETDEV_DEV(netdev, mdev->device); - if (rep->vport == MLX5_VPORT_UPLINK) { - netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; - /* we want a persistent mac for the uplink rep */ - mlx5_query_mac_address(mdev, netdev->dev_addr); - netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; - mlx5e_dcbnl_build_rep_netdev(netdev); - } else { - netdev->netdev_ops = &mlx5e_netdev_ops_rep; - eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; - } + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->watchdog_timeo = 15 * HZ;
- netdev->features |= NETIF_F_NETNS_LOCAL; - #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; #endif @@@ -744,12 -610,9 +610,9 @@@ netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM;
- if (rep->vport == MLX5_VPORT_UPLINK) - netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - else - netdev->features |= NETIF_F_VLAN_CHALLENGED; - netdev->features |= netdev->hw_features; + netdev->features |= NETIF_F_VLAN_CHALLENGED; + netdev->features |= NETIF_F_NETNS_LOCAL; }
static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@@ -890,6 -753,7 +753,7 @@@ int mlx5e_rep_bond_update(struct mlx5e_ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err;
mlx5e_init_l2_addr(priv); @@@ -904,7 -768,7 +768,7 @@@ if (err) goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts;
@@@ -912,7 -776,7 +776,7 @@@ if (err) goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs;
@@@ -937,11 -801,11 +801,11 @@@ err_destroy_root_ft err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@@ -951,13 -815,15 +815,15 @@@
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@@ -1107,15 -973,22 +973,23 @@@ static void mlx5e_uplink_rep_enable(str
mlx5e_rep_tc_enable(priv);
- mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, - 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); + if (MLX5_CAP_GEN(mdev, uplink_follow)) + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); mlx5e_rep_neigh_init(rpriv); + + netdev->wanted_features |= NETIF_F_HW_TC; + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); }
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) @@@ -1123,6 -996,12 +997,12 @@@ struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev;
+ rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); @@@ -1183,6 -1062,7 +1063,7 @@@ static const struct mlx5e_profile mlx5e .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, + .rx_ptp_support = false, };
static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@@ -1199,33 -1079,65 +1080,65 @@@ .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = MLX5E_MAX_NUM_TC, - .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + /* XSK is needed so we can replace profile with NIC netdev */ + .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, + .rx_ptp_support = false, };
/* e-Switch vport representors */ static int - mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + { + struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev)); + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct devlink_port *dl_port; + int err; + + rpriv->netdev = priv->netdev; + + err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, + rpriv); + if (err) + return err; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_eth_set(dl_port, rpriv->netdev); + + return 0; + } + + static void + mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) + { + struct net_device *netdev = rpriv->netdev; + struct devlink_port *dl_port; + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + dev = priv->mdev; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_clear(dl_port); + mlx5e_netdev_attach_nic_profile(priv); + } + + static int + mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); const struct mlx5e_profile *profile; - struct mlx5e_rep_priv *rpriv; struct devlink_port *dl_port; struct net_device *netdev; struct mlx5e_priv *priv; unsigned int txqs, rxqs; int nch, err;
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); - if (!rpriv) - return -ENOMEM; - - /* rpriv->rep to be looked up when profile->init() is called */ - rpriv->rep = rep; - - profile = (rep->vport == MLX5_VPORT_UPLINK) ? - &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; - + profile = &mlx5e_rep_profile; nch = mlx5e_get_max_num_channels(dev); txqs = nch * profile->max_tc; rxqs = nch * profile->rq_groups; @@@ -1234,21 -1146,11 +1147,11 @@@ mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", rep->vport); - kfree(rpriv); return -EINVAL; }
- mlx5e_build_rep_netdev(netdev, dev, rep); - + mlx5e_build_rep_netdev(netdev, dev); rpriv->netdev = netdev; - rep->rep_data[REP_ETH].priv = rpriv; - INIT_LIST_HEAD(&rpriv->vport_sqs_list); - - if (rep->vport == MLX5_VPORT_UPLINK) { - err = mlx5e_create_mdev_resources(dev); - if (err) - goto err_destroy_netdev; - }
priv = netdev_priv(netdev); priv->profile = profile; @@@ -1256,7 -1158,7 +1159,7 @@@ err = profile->init(dev, netdev); if (err) { netdev_warn(netdev, "rep profile init failed, %d\n", err); - goto err_destroy_mdev_resources; + goto err_destroy_netdev; }
err = mlx5e_attach_netdev(netdev_priv(netdev)); @@@ -1286,13 -1188,34 +1189,34 @@@ err_detach_netdev err_cleanup_profile: priv->profile->cleanup(priv);
- err_destroy_mdev_resources: - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_destroy_mdev_resources(dev); - err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); - kfree(rpriv); + return err; + } + + static int + mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + { + struct mlx5e_rep_priv *rpriv; + int err; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + rep->rep_data[REP_ETH].priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + + if (rep->vport == MLX5_VPORT_UPLINK) + err = mlx5e_vport_uplink_rep_load(dev, rep); + else + err = mlx5e_vport_vf_rep_load(dev, rep); + + if (err) + kfree(rpriv); + return err; }
@@@ -1306,15 -1229,19 +1230,19 @@@ mlx5e_vport_rep_unload(struct mlx5_eswi struct devlink_port *dl_port; void *ppriv = priv->ppriv;
+ if (rep->vport == MLX5_VPORT_UPLINK) { + mlx5e_vport_uplink_rep_unload(rpriv); + goto free_ppriv; + } + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); if (dl_port) devlink_port_type_clear(dl_port); unregister_netdev(netdev); mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); + free_ppriv: kfree(ppriv); /* mlx5e_rep_priv */ }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 88a01c59ce61,f67e51d8291a..ae0570ea08bf --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@@ -116,6 -116,7 +116,6 @@@ static const struct counter_desc sw_sta #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, @@@ -179,6 -180,8 +179,6 @@@ #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_ctx) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_del) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) }, @@@ -339,6 -342,8 +339,6 @@@ static void mlx5e_stats_grp_sw_update_s #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; - s->rx_tls_ctx += rq_stats->tls_ctx; - s->rx_tls_del += rq_stats->tls_del; s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt; s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start; s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end; @@@ -385,6 -390,7 +385,6 @@@ static void mlx5e_stats_grp_sw_update_s #ifdef CONFIG_MLX5_EN_TLS s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; - s->tx_tls_ctx += sq_stats->tls_ctx; s->tx_tls_ooo += sq_stats->tls_ooo; s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; s->tx_tls_dump_packets += sq_stats->tls_dump_packets; @@@ -401,13 -407,21 +401,21 @@@ static void mlx5e_stats_grp_sw_update_s { int i;
- if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return;
- mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->port_ptp_stats.ch); + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
- for (i = 0; i < priv->max_opened_tc; i++) { - mlx5e_stats_grp_sw_update_stats_sq(s, &priv->port_ptp_stats.sq[i]); + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + if (priv->rx_ptp_opened) { + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq);
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ barrier(); @@@ -1616,6 -1630,8 +1624,6 @@@ static const struct counter_desc rq_sta #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_ctx) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_del) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) }, @@@ -1642,6 -1658,7 +1650,6 @@@ static const struct counter_desc sq_sta #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@@ -1751,6 -1768,38 +1759,38 @@@ static const struct counter_desc ptp_cq { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, };
+ static const struct counter_desc ptp_rq_stats_desc[] = { + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, + }; + static const struct counter_desc qos_sq_stats_desc[] = { { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, @@@ -1767,6 -1816,7 +1807,6 @@@ #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@@ -1795,6 -1845,7 +1835,7 @@@ #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) + #define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc) #define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) @@@ -1841,32 -1892,46 +1882,46 @@@ static MLX5E_DECLARE_STATS_GRP_OP_UPDAT
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { - return priv->port_ptp_opened ? - NUM_PTP_CH_STATS + - ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) : - 0; + int num = NUM_PTP_CH_STATS; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return 0; + + if (priv->tx_ptp_opened) + num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc; + if (priv->rx_ptp_opened) + num += NUM_PTP_RQ_STATS; + + return num; }
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) { int i, tc;
- if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx;
for (i = 0; i < NUM_PTP_CH_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, ptp_ch_stats_desc[i].format);
- for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_sq_stats_desc[i].format, tc); + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_sq_stats_desc[i].format, tc);
- for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_cq_stats_desc[i].format, tc); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_cq_stats_desc[i].format, tc); + ptp_rq_stats_desc[i].format); + } return idx; }
@@@ -1874,26 -1939,33 +1929,33 @@@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_ { int i, tc;
- if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx;
for (i = 0; i < NUM_PTP_CH_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.ch, + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, ptp_ch_stats_desc, i);
- for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.sq[tc], - ptp_sq_stats_desc, i); - - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i); + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.cq[tc], - ptp_cq_stats_desc, i); - + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, + ptp_rq_stats_desc, i); + } return idx; }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index adf9b7b8b712,ca398eac09c1..21d3b8747f93 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@@ -54,6 -54,7 +54,7 @@@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) + #define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
@@@ -191,6 -192,7 +192,6 @@@ struct mlx5e_sw_stats #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; - u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_dump_packets; u64 tx_tls_dump_bytes; @@@ -201,6 -203,8 +202,6 @@@
u64 rx_tls_decrypted_packets; u64 rx_tls_decrypted_bytes; - u64 rx_tls_ctx; - u64 rx_tls_del; u64 rx_tls_resync_req_pkt; u64 rx_tls_resync_req_start; u64 rx_tls_resync_req_end; @@@ -331,6 -335,8 +332,6 @@@ struct mlx5e_rq_stats #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; - u64 tls_ctx; - u64 tls_del; u64 tls_resync_req_pkt; u64 tls_resync_req_start; u64 tls_resync_req_end; @@@ -359,6 -365,7 +360,6 @@@ struct mlx5e_sq_stats #ifdef CONFIG_MLX5_EN_TLS u64 tls_encrypted_packets; u64 tls_encrypted_bytes; - u64 tls_ctx; u64 tls_ooo; u64 tls_dump_packets; u64 tls_dump_bytes; diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d4a2f8d1ee9f,ac92ffc8a5d3..b64c2fcc9957 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -40,7 -40,6 +40,6 @@@ #include "eswitch.h" #include "esw/indir_table.h" #include "esw/acl/ofld.h" - #include "esw/indir_table.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@@ -48,6 -47,7 +47,7 @@@ #include "lib/eq.h" #include "lib/fs_chains.h" #include "en_tc.h" + #include "en/mapping.h"
/* There are two match-all miss flows, one for unicast dst mac and * one for multicast. @@@ -55,184 -55,14 +55,14 @@@ #define MLX5_ESW_MISS_FLOWS (2) #define UPLINK_REP_INDEX 0
- /* Per vport tables */ - - #define MLX5_ESW_VPORT_TABLE_SIZE 128 - - /* This struct is used as a key to the hash table and we need it to be packed - * so hash result is consistent - */ - struct mlx5_vport_key { - u32 chain; - u16 prio; - u16 vport; - u16 vhca_id; - } __packed; - - struct mlx5_vport_tbl_attr { - u16 chain; - u16 prio; - u16 vport; - }; - - struct mlx5_vport_table { - struct hlist_node hlist; - struct mlx5_flow_table *fdb; - u32 num_rules; - struct mlx5_vport_key key; - }; - + #define MLX5_ESW_VPORT_TBL_SIZE 128 #define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4
- static struct mlx5_flow_table * - esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns) - { - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *fdb; - - ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS; - ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE; - ft_attr.prio = FDB_PER_VPORT; - fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", - PTR_ERR(fdb)); - } - - return fdb; - } - - static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, - struct mlx5_vport_tbl_attr *attr, - struct mlx5_vport_key *key) - { - key->vport = attr->vport; - key->chain = attr->chain; - key->prio = attr->prio; - key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); - return jhash(key, sizeof(*key), 0); - } - - /* caller must hold vports.lock */ - static struct mlx5_vport_table * - esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) - { - struct mlx5_vport_table *e; - - hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) - if (!memcmp(&e->key, skey, sizeof(*skey))) - return e; - - return NULL; - } - - static void - esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) - { - struct mlx5_vport_table *e; - struct mlx5_vport_key key; - u32 hkey; - - mutex_lock(&esw->fdb_table.offloads.vports.lock); - hkey = flow_attr_to_vport_key(esw, attr, &key); - e = esw_vport_tbl_lookup(esw, &key, hkey); - if (!e || --e->num_rules) - goto out; - - hash_del(&e->hlist); - mlx5_destroy_flow_table(e->fdb); - kfree(e); - out: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - } - - static struct mlx5_flow_table * - esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) - { - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *fdb; - struct mlx5_vport_table *e; - struct mlx5_vport_key skey; - u32 hkey; - - mutex_lock(&esw->fdb_table.offloads.vports.lock); - hkey = flow_attr_to_vport_key(esw, attr, &skey); - e = esw_vport_tbl_lookup(esw, &skey, hkey); - if (e) { - e->num_rules++; - goto out; - } - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) { - fdb = ERR_PTR(-ENOMEM); - goto err_alloc; - } - - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!ns) { - esw_warn(dev, "Failed to get FDB namespace\n"); - fdb = ERR_PTR(-ENOENT); - goto err_ns; - } - - fdb = esw_vport_tbl_create(esw, ns); - if (IS_ERR(fdb)) - goto err_ns; - - e->fdb = fdb; - e->num_rules = 1; - e->key = skey; - hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); - out: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - return e->fdb; - - err_ns: - kfree(e); - err_alloc: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - return fdb; - } - - int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw) - { - struct mlx5_vport_tbl_attr attr; - struct mlx5_flow_table *fdb; - struct mlx5_vport *vport; - int i; - - attr.chain = 0; - attr.prio = 1; - mlx5_esw_for_all_vports(esw, i, vport) { - attr.vport = vport->vport; - fdb = esw_vport_tbl_get(esw, &attr); - if (IS_ERR(fdb)) - goto out; - } - return 0; - - out: - mlx5_esw_vport_tbl_put(esw); - return PTR_ERR(fdb); - } - - void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw) - { - struct mlx5_vport_tbl_attr attr; - struct mlx5_vport *vport; - int i; - - attr.chain = 0; - attr.prio = 1; - mlx5_esw_for_all_vports(esw, i, vport) { - attr.vport = vport->vport; - esw_vport_tbl_put(esw, &attr); - } - } - - /* End: Per vport tables */ + static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE, + .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, + .flags = 0, + };
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) @@@ -256,6 -86,26 +86,26 @@@ mlx5_eswitch_set_rule_flow_source(struc MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; }
+ /* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits + * are not needed as well in the following process. So clear them all for simplicity. + */ + void + mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec) + { + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + void *misc2; + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2))) + spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2; + } + } + static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@@ -326,6 -176,19 +176,19 @@@ esw_cleanup_decap_indir(struct mlx5_esw true); }
+ static int + esw_setup_sampler_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_esw_flow_attr *esw_attr, + int i) + { + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest[i].sampler_id = esw_attr->sample->sampler_id; + + return 0; + } + static int esw_setup_ft_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@@ -537,14 -400,6 +400,14 @@@ esw_setup_vport_dests(struct mlx5_flow_ return i; }
+static bool +esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_GEN(esw->dev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); +} + static int esw_setup_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@@ -558,10 -413,15 +421,13 @@@ int err = 0;
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && - MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && - mlx5_eswitch_vport_match_metadata_enabled(esw) && - MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + esw_src_port_rewrite_supported(esw)) attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
- if (attr->dest_ft) { + if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { + esw_setup_sampler_dest(dest, flow_act, esw_attr, *i); + (*i)++; + } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { @@@ -664,12 -524,16 +530,16 @@@ mlx5_eswitch_add_offloaded_rule(struct if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr;
- if (split) { + /* esw_attr->sample is allocated only when there is a sample action */ + if (esw_attr->sample && esw_attr->sample->sample_default_tbl) { + fdb = esw_attr->sample->sample_default_tbl; + } else if (split) { fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
- fdb = esw_vport_tbl_get(esw, &fwd_attr); + fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); } else { if (attr->chain || attr->prio) fdb = mlx5_chains_get_table(chains, attr->chain, @@@ -701,7 -565,7 +571,7 @@@
err_add_rule: if (split) - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_esw_get: @@@ -734,7 -598,8 +604,8 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; - fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr); + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@@ -779,7 -644,7 +650,7 @@@ return rule; err_chain_src_rewrite: esw_put_dest_tables_loop(esw, attr, 0, i); - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_get_fast: @@@ -814,15 -679,16 +685,16 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; }
if (fwd_rule) { - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count); } else { if (split) - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); esw_cleanup_dests(esw, attr); @@@ -1453,14 -1319,14 +1325,14 @@@ esw_add_restore_rule(struct mlx5_eswitc if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) return ERR_PTR(-EOPNOTSUPP);
- spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - ESW_CHAIN_TAG_METADATA_MASK); + ESW_REG_C0_USER_DATA_METADATA_MASK); misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); @@@ -1476,7 -1342,7 +1348,7 @@@ dest.ft = esw->offloads.ft_offloads;
flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - kfree(spec); + kvfree(spec);
if (IS_ERR(flow_rule)) esw_warn(esw->dev, @@@ -1486,12 -1352,6 +1358,6 @@@ return flow_rule; }
- u32 - esw_get_max_restore_tag(struct mlx5_eswitch *esw) - { - return ESW_CHAIN_TAG_METADATA_MASK; - } - #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32
@@@ -1521,6 -1381,44 +1387,44 @@@ static void esw_set_flow_group_source_p }
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + static void esw_vport_tbl_put(struct mlx5_eswitch *esw) + { + struct mlx5_vport_tbl_attr attr; + struct mlx5_vport *vport; + int i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + mlx5_esw_vporttbl_put(esw, &attr); + } + } + + static int esw_vport_tbl_get(struct mlx5_eswitch *esw) + { + struct mlx5_vport_tbl_attr attr; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + int i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fdb = mlx5_esw_vporttbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + + out: + esw_vport_tbl_put(esw); + return PTR_ERR(fdb); + } + #define fdb_modify_header_fwd_to_table_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags) @@@ -1570,7 -1468,7 +1474,7 @@@ esw_chains_create(struct mlx5_eswitch * attr.max_ft_sz = fdb_max; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; - attr.max_restore_tag = esw_get_max_restore_tag(esw); + attr.mapping = esw->offloads.reg_c0_obj_pool;
chains = mlx5_chains_create(dev, &attr); if (IS_ERR(chains)) { @@@ -1598,7 -1496,7 +1502,7 @@@
/* Open level 1 for split fdb rules now if prios isn't supported */ if (!mlx5_chains_prios_supported(chains)) { - err = mlx5_esw_vport_tbl_get(esw); + err = esw_vport_tbl_get(esw); if (err) goto level_1_err; } @@@ -1622,7 -1520,7 +1526,7 @@@ static voi esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) { if (!mlx5_chains_prios_supported(chains)) - mlx5_esw_vport_tbl_put(esw); + esw_vport_tbl_put(esw); mlx5_chains_put_table(chains, 0, 1, 0); mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); mlx5_chains_destroy(chains); @@@ -1722,40 -1620,36 +1626,40 @@@ static int esw_create_offloads_fdb_tabl } esw->fdb_table.offloads.send_to_vport_grp = g;
- /* meta send to vport */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); + if (esw_src_port_rewrite_supported(esw)) { + /* meta send to vport */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); - - num_vfs = esw->esw_funcs.num_vfs; - if (num_vfs) { - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1); - ix += num_vfs; - - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", - err); - goto send_vport_meta_err; + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + num_vfs = esw->esw_funcs.num_vfs; + if (num_vfs) { + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, ix + num_vfs - 1); + ix += num_vfs; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", + err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); + if (err) + goto meta_rule_err; } - esw->fdb_table.offloads.send_to_vport_meta_grp = g; - - err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); - if (err) - goto meta_rule_err; }
if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { @@@ -1865,6 -1759,7 +1769,7 @@@ static void esw_destroy_offloads_fdb_ta /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); + atomic64_set(&esw->user_count, 0); }
static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@@ -2067,7 -1962,7 +1972,7 @@@ static int esw_create_restore_table(str goto out_free; }
- ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS; ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) { err = PTR_ERR(ft); @@@ -2082,7 -1977,7 +1987,7 @@@ misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - ESW_CHAIN_TAG_METADATA_MASK); + ESW_REG_C0_USER_DATA_METADATA_MASK); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft_attr.max_fte - 1); @@@ -2270,9 -2165,11 +2175,11 @@@ int esw_offloads_load_rep(struct mlx5_e if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0;
- err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); - if (err) - return err; + if (vport_num != MLX5_VPORT_UPLINK) { + err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); + if (err) + return err; + }
err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) @@@ -2280,7 -2177,8 +2187,8 @@@ return err;
load_err: - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); return err; }
@@@ -2290,7 -2188,9 +2198,9 @@@ void esw_offloads_unload_rep(struct mlx return;
mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); }
#define ESW_OFFLOADS_DEVCOM_PAIR (0) @@@ -2565,6 -2465,9 +2475,9 @@@ static int esw_create_uplink_offloads_a struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return PTR_ERR(vport); + return esw_vport_create_offloads_acl_tables(esw, vport); }
@@@ -2573,6 -2476,9 +2486,9 @@@ static void esw_destroy_uplink_offloads struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return; + esw_vport_destroy_offloads_acl_tables(esw, vport); }
@@@ -2584,6 -2490,7 +2500,7 @@@ static int esw_offloads_steering_init(s memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.vports.lock); hash_init(esw->fdb_table.offloads.vports.table); + atomic64_set(&esw->user_count, 0);
indir = mlx5_esw_indir_table_init(); if (IS_ERR(indir)) { @@@ -2728,6 -2635,7 +2645,7 @@@ static int mlx5_esw_host_number_init(st
int esw_offloads_enable(struct mlx5_eswitch *esw) { + struct mapping_ctx *reg_c0_obj_pool; struct mlx5_vport *vport; int err, i;
@@@ -2755,6 -2663,15 +2673,15 @@@ if (err) goto err_vport_metadata;
+ reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + if (IS_ERR(reg_c0_obj_pool)) { + err = PTR_ERR(reg_c0_obj_pool); + goto err_pool; + } + esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool; + err = esw_offloads_steering_init(esw); if (err) goto err_steering_init; @@@ -2781,6 -2698,8 +2708,8 @@@ err_vports err_uplink: esw_offloads_steering_cleanup(esw); err_steering_init: + mapping_destroy(reg_c0_obj_pool); + err_pool: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_metadata_uninit(esw); @@@ -2819,6 -2738,7 +2748,7 @@@ void esw_offloads_disable(struct mlx5_e esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); @@@ -2925,8 -2845,14 +2855,14 @@@ int mlx5_devlink_eswitch_mode_set(struc if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL;
- mutex_lock(&esw->mode_lock); - cur_mlx5_mode = esw->mode; + err = mlx5_esw_try_lock(esw); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); + return err; + } + cur_mlx5_mode = err; + err = 0; + if (cur_mlx5_mode == mlx5_mode) goto unlock;
@@@ -2938,7 -2864,7 +2874,7 @@@ err = -EINVAL;
unlock: - mutex_unlock(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; }
@@@ -2951,14 -2877,14 +2887,14 @@@ int mlx5_devlink_eswitch_mode_get(struc if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
err = esw_mode_to_devlink(esw->mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -2974,7 -2900,7 +2910,7 @@@ int mlx5_devlink_eswitch_inline_mode_se if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto out; @@@ -3013,7 -2939,7 +2949,7 @@@ }
esw->offloads.inline_mode = mlx5_mode; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0;
revert_inline_mode: @@@ -3023,7 -2949,7 +2959,7 @@@ vport, esw->offloads.inline_mode); out: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3036,14 -2962,14 +2972,14 @@@ int mlx5_devlink_eswitch_inline_mode_ge if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3059,7 -2985,7 +2995,7 @@@ int mlx5_devlink_eswitch_encap_mode_set if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; @@@ -3105,7 -3031,7 +3041,7 @@@ }
unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3120,14 -3046,14 +3056,14 @@@ int mlx5_devlink_eswitch_encap_mode_get return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
*encap = esw->offloads.encap; unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0; }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 4b5e2f4f7160,b65b0cefc5b3..e8d73c8c890d --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@@ -33,6 -33,7 +33,7 @@@ #include <rdma/ib_verbs.h> #include <linux/mlx5/fs.h> #include "en.h" + #include "en/params.h" #include "ipoib.h"
#define IB_DEFAULT_Q_KEY 0xb1b @@@ -372,6 -373,7 +373,7 @@@ static void mlx5i_destroy_flow_steering static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err;
mlx5e_create_q_counters(priv); @@@ -386,7 -388,7 +388,7 @@@ if (err) goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts;
@@@ -394,7 -396,7 +396,7 @@@ if (err) goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs;
@@@ -405,11 -407,11 +407,11 @@@ return 0;
err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@@ -421,10 -423,12 +423,12 @@@ err_destroy_q_counters
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@@ -469,6 -473,7 +473,7 @@@ static const struct mlx5e_profile mlx5i .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5i_stats_grps, .stats_grps_num = mlx5i_stats_grps_num, + .rx_ptp_support = false, };
/* mlx5i netdev NDos */ @@@ -710,7 -715,7 +715,7 @@@ static void mlx5_rdma_netdev_free(struc
static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev) { - return mdev->mlx5e_res.pdn != 0; + return mdev->mlx5e_res.hw_objs.pdn != 0; }
static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) @@@ -720,7 -725,7 +725,7 @@@ return &mlx5i_nic_profile; }
-static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, +static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, struct net_device *netdev, void *param) { struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param; diff --combined drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 7447c2a73cbd,37fb2e1fb278..dfea14399607 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@@ -19,7 -19,6 +19,6 @@@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ - #define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 @@@ -45,7 -44,6 +44,6 @@@ enum mlxsw_thermal_trips MLXSW_THERMAL_TEMP_TRIP_NORM, MLXSW_THERMAL_TEMP_TRIP_HIGH, MLXSW_THERMAL_TEMP_TRIP_HOT, - MLXSW_THERMAL_TEMP_TRIP_CRIT, };
struct mlxsw_thermal_trip { @@@ -75,16 -73,9 +73,9 @@@ static const struct mlxsw_thermal_trip { /* Warning */ .type = THERMAL_TRIP_HOT, .temp = MLXSW_THERMAL_ASIC_TEMP_HOT, - .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, }, - { /* Critical - soft poweroff */ - .type = THERMAL_TRIP_CRITICAL, - .temp = MLXSW_THERMAL_ASIC_TEMP_CRIT, - .min_state = MLXSW_THERMAL_MAX_STATE, - .max_state = MLXSW_THERMAL_MAX_STATE, - } };
#define MLXSW_THERMAL_NUM_TRIPS ARRAY_SIZE(default_thermal_trips) @@@ -141,7 -132,7 +132,7 @@@ static int mlxsw_get_cooling_device_idx /* Allow mlxsw thermal zone binding to an external cooling device */ for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) { if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i], - sizeof(cdev->type))) + strlen(cdev->type))) return 0; }
@@@ -154,7 -145,6 +145,6 @@@ mlxsw_thermal_module_trips_reset(struc tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0; }
static int @@@ -183,11 -173,10 +173,10 @@@ mlxsw_thermal_module_trips_update(struc }
/* According to the system thermal requirements, the thermal zones are - * defined with four trip points. The critical and emergency + * defined with three trip points. The critical and emergency * temperature thresholds, provided by QSFP module are set as "active" - * and "hot" trip points, "normal" and "critical" trip points are - * derived from "active" and "hot" by subtracting or adding double - * hysteresis value. + * and "hot" trip points, "normal" trip point is derived from "active" + * by subtracting double hysteresis value. */ if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT) tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp - @@@ -196,8 -185,6 +185,6 @@@ tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + - MLXSW_THERMAL_MODULE_TEMP_SHIFT;
return 0; } @@@ -210,7 -197,7 +197,7 @@@ static void mlxsw_thermal_tz_score_upda struct mlxsw_thermal_trip *trip = trips; unsigned int score, delta, i, shift = 1;
- /* Calculate thermal zone score, if temperature is above the critical + /* Calculate thermal zone score, if temperature is above the hot * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. */ score = MLXSW_THERMAL_TEMP_SCORE_MAX; @@@ -333,8 -320,7 +320,7 @@@ static int mlxsw_thermal_set_trip_temp( { struct mlxsw_thermal *thermal = tzdev->devdata;
- if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > MLXSW_THERMAL_ASIC_TEMP_CRIT) + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) return -EINVAL;
thermal->trips[trip].temp = temp; @@@ -502,8 -488,7 +488,7 @@@ mlxsw_thermal_module_trip_temp_set(stru { struct mlxsw_thermal_module *tz = tzdev->devdata;
- if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp) + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) return -EINVAL;
tz->trips[trip].temp = temp; diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum.h index ba28ac7e79bc,97d074d7b78d..f99db88ee884 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@@ -16,12 -16,12 +16,13 @@@ #include <linux/in6.h> #include <linux/notifier.h> #include <linux/net_namespace.h> + #include <linux/spinlock.h> #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> #include <net/vxlan.h> #include <net/flow_offload.h> +#include <net/inet_ecn.h>
#include "port.h" #include "core.h" @@@ -87,10 -87,15 +88,15 @@@ enum mlxsw_sp_rif_type MLXSW_SP_RIF_TYPE_MAX, };
- struct mlxsw_sp_rif_ops; + struct mlxsw_sp_router_ops;
- extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[]; - extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[]; + extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops; + extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops; + + struct mlxsw_sp_switchdev_ops; + + extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops; + extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops;
enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_8021Q, @@@ -134,6 -139,7 +140,7 @@@ struct mlxsw_sp_ptp_state struct mlxsw_sp_ptp_ops; struct mlxsw_sp_span_ops; struct mlxsw_sp_qdisc_state; + struct mlxsw_sp_mall_entry;
struct mlxsw_sp_port_mapping { u8 module; @@@ -149,6 -155,7 +156,7 @@@ struct mlxsw_sp const unsigned char *mac_mask; struct mlxsw_sp_upper *lags; struct mlxsw_sp_port_mapping **port_mapping; + struct rhashtable sample_trigger_ht; struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; @@@ -165,6 -172,7 +173,7 @@@ struct mlxsw_sp_counter_pool *counter_pool; struct mlxsw_sp_span *span; struct mlxsw_sp_trap *trap; + const struct mlxsw_sp_switchdev_ops *switchdev_ops; const struct mlxsw_sp_kvdl_ops *kvdl_ops; const struct mlxsw_afa_ops *afa_ops; const struct mlxsw_afk_ops *afk_ops; @@@ -172,7 -180,6 +181,6 @@@ const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; - const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_sb_ops *sb_ops; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; @@@ -180,6 -187,8 +188,8 @@@ const struct mlxsw_sp_span_ops *span_ops; const struct mlxsw_sp_policer_core_ops *policer_core_ops; const struct mlxsw_sp_trap_ops *trap_ops; + const struct mlxsw_sp_mall_ops *mall_ops; + const struct mlxsw_sp_router_ops *router_ops; const struct mlxsw_listener *listeners; size_t listeners_count; u32 lowest_shaper_bs; @@@ -233,7 -242,18 +243,18 @@@ struct mlxsw_sp_port_pcpu_stats u32 tx_dropped; };
- struct mlxsw_sp_port_sample { + enum mlxsw_sp_sample_trigger_type { + MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + + struct mlxsw_sp_sample_trigger { + enum mlxsw_sp_sample_trigger_type type; + u8 local_port; /* Reserved when trigger type is not ingress / egress. */ + }; + + struct mlxsw_sp_sample_params { struct psample_group *psample_group; u32 trunc_size; u32 rate; @@@ -303,7 -323,6 +324,6 @@@ struct mlxsw_sp_port struct mlxsw_sp_port_xstats xstats; struct delayed_work update_dw; } periodic_hw_stats; - struct mlxsw_sp_port_sample __rcu *sample; struct list_head vlans_list; struct mlxsw_sp_port_vlan *default_vlan; struct mlxsw_sp_qdisc_state *qdisc; @@@ -348,20 -367,6 +368,20 @@@ struct mlxsw_sp_port_type_speed_ops u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap); };
+static inline u8 mlxsw_sp_tunnel_ecn_decap(u8 outer_ecn, u8 inner_ecn, + bool *trap_en) +{ + bool set_ce = false; + + *trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + if (set_ce) + return INET_ECN_CE; + else if (outer_ecn == INET_ECN_ECT_1 && inner_ecn == INET_ECN_ECT_0) + return INET_ECN_ECT_1; + else + return inner_ecn; +} + static inline struct net_device * mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) { @@@ -546,6 -551,17 +566,17 @@@ void mlxsw_sp_hdroom_bufs_reset_sizes(s struct mlxsw_sp_hdroom *hdroom); int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, const struct mlxsw_sp_hdroom *hdroom); + struct mlxsw_sp_sample_params * + mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger); + int + mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params, + struct netlink_ext_ack *extack); + void + mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger);
extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; @@@ -583,8 -599,6 +614,6 @@@ void mlxsw_sp_rx_listener_no_mark_func( u8 local_port, void *priv); void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u8 local_port); - void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port); int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, @@@ -601,6 -615,8 +630,8 @@@ int mlxsw_sp_port_vp_mode_set(struct ml int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, bool learn_enable); int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type); + int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ethtype); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u16 ethtype); struct mlxsw_sp_port_vlan * @@@ -939,6 -955,12 +970,12 @@@ int mlxsw_sp_acl_rulei_act_count(struc int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u16 fid, struct netlink_ext_ack *extack); + int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, + struct netlink_ext_ack *extack);
struct mlxsw_sp_acl_rule;
@@@ -1048,6 -1070,19 +1085,19 @@@ extern const struct mlxsw_afk_ops mlxsw extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
/* spectrum_matchall.c */ + struct mlxsw_sp_mall_ops { + int (*sample_add)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack); + void (*sample_del)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry); + }; + + extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops; + extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops; + enum mlxsw_sp_mall_action_type { MLXSW_SP_MALL_ACTION_TYPE_MIRROR, MLXSW_SP_MALL_ACTION_TYPE_SAMPLE, @@@ -1063,6 -1098,11 +1113,11 @@@ struct mlxsw_sp_mall_trap_entry int span_id; };
+ struct mlxsw_sp_mall_sample_entry { + struct mlxsw_sp_sample_params params; + int span_id; /* Relevant for Spectrum-2 onwards. */ + }; + struct mlxsw_sp_mall_entry { struct list_head list; unsigned long cookie; @@@ -1072,7 -1112,7 +1127,7 @@@ union { struct mlxsw_sp_mall_mirror_entry mirror; struct mlxsw_sp_mall_trap_entry trap; - struct mlxsw_sp_port_sample sample; + struct mlxsw_sp_mall_sample_entry sample; }; struct rcu_head rcu; }; @@@ -1083,7 -1123,8 +1138,8 @@@ int mlxsw_sp_mall_replace(struct mlxsw_ void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block, struct tc_cls_matchall_offload *f); int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block, - struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block, struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index, diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 64a8f838eb53,b8b08a6a1d10..5facabd86882 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@@ -127,14 -127,16 +127,16 @@@ bool mlxsw_sp_l3addr_is_zero(union mlxs
static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry) + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); - char ratr_pl[MLXSW_REG_RATR_LEN]; + enum mlxsw_reg_ratr_op op;
- mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_IPIP, + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP, adj_index, rif_index); mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
@@@ -335,11 -337,12 +337,11 @@@ static int mlxsw_sp_ipip_ecn_decap_init u8 inner_ecn, u8 outer_ecn) { char tidem_pl[MLXSW_REG_TIDEM_LEN]; - bool trap_en, set_ce = false; u8 new_inner_ecn; + bool trap_en;
- trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); - new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; - + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); diff --combined drivers/net/ethernet/netronome/nfp/flower/main.h index 56833a41f3d2,e13e26e72ca0..31377923ea3d --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@@ -47,6 -47,7 +47,7 @@@ struct nfp_app #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) #define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_VLAN_QINQ BIT(8) + #define NFP_FL_FEATS_QOS_PPS BIT(9) #define NFP_FL_FEATS_HOST_ACK BIT(31)
#define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@@ -61,7 -62,8 +62,8 @@@ NFP_FL_FEATS_FLOW_MOD | \ NFP_FL_FEATS_PRE_TUN_RULES | \ NFP_FL_FEATS_IPV6_TUN | \ - NFP_FL_FEATS_VLAN_QINQ) + NFP_FL_FEATS_VLAN_QINQ | \ + NFP_FL_FEATS_QOS_PPS)
struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@@ -190,7 -192,6 +192,7 @@@ struct nfp_fl_internal_ports * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded + * @merge_table: Hash table to store merged flows */ struct nfp_flower_priv { struct nfp_app *app; @@@ -224,7 -225,6 +226,7 @@@ unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ int pre_tun_rule_cnt; + struct rhashtable merge_table; };
/** @@@ -352,12 -352,6 +354,12 @@@ struct nfp_fl_payload_link };
extern const struct rhashtable_params nfp_flower_table_params; +extern const struct rhashtable_params merge_table_params; + +struct nfp_merge_info { + u64 parent_ctx; + struct rhash_head ht_node; +};
struct nfp_fl_stats_frame { __be32 stats_con_id; diff --combined drivers/net/ethernet/realtek/r8169_main.c index 7ab4ea706839,1cd5c6f6d44f..a838187b97ac --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@@ -1586,12 -1586,10 +1586,10 @@@ DECLARE_RTL_COND(rtl_counters_cond
static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd) { - dma_addr_t paddr = tp->counters_phys_addr; - u32 cmd; + u32 cmd = lower_32_bits(tp->counters_phys_addr);
- RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32); + RTL_W32(tp, CounterAddrHigh, upper_32_bits(tp->counters_phys_addr)); rtl_pci_commit(tp); - cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(tp, CounterAddrLow, cmd); RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
@@@ -1903,6 -1901,15 +1901,15 @@@ static int rtl8169_set_eee(struct net_d return ret; }
+ static void rtl8169_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *data) + { + data->rx_max_pending = NUM_RX_DESC; + data->rx_pending = NUM_RX_DESC; + data->tx_max_pending = NUM_TX_DESC; + data->tx_pending = NUM_TX_DESC; + } + static const struct ethtool_ops rtl8169_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@@ -1923,6 -1930,7 +1930,7 @@@ .set_eee = rtl8169_set_eee, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_ringparam = rtl8169_get_ringparam, };
static void rtl_enable_eee(struct rtl8169_private *tp) @@@ -2728,11 -2736,6 +2736,6 @@@ static void rtl_hw_start_8168c_2(struc __rtl_hw_start_8168cp(tp); }
- static void rtl_hw_start_8168c_3(struct rtl8169_private *tp) - { - rtl_hw_start_8168c_2(tp); - } - static void rtl_hw_start_8168c_4(struct rtl8169_private *tp) { rtl_set_def_aspm_entry_latency(tp); @@@ -3645,7 -3648,7 +3648,7 @@@ static void rtl_hw_config(struct rtl816 [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1, [RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1, [RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2, - [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3, + [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_2, [RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4, [RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2, [RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3, @@@ -4358,6 -4361,20 +4361,6 @@@ static void rtl8169_pcierr_interrupt(st if (net_ratelimit()) netdev_err(dev, "PCI error (cmd = 0x%04x, status_errs = 0x%04x)\n", pci_cmd, pci_status_errs); - /* - * The recovery sequence below admits a very elaborated explanation: - * - it seems to work; - * - I did not see what else could be done; - * - it makes iop3xx happy. - * - * Feel free to adjust to your needs. - */ - if (pdev->broken_parity_status) - pci_cmd &= ~PCI_COMMAND_PARITY; - else - pci_cmd |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY; - - pci_write_config_word(pdev, PCI_COMMAND, pci_cmd);
rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } diff --combined drivers/net/ethernet/smsc/smc91x.c index abd083efbfd7,cbde83f620a0..bc19db2dbafb --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@@ -2190,7 -2190,6 +2190,7 @@@ static const struct of_device_id smc91x }; MODULE_DEVICE_TABLE(of, smc91x_match);
+#if defined(CONFIG_GPIOLIB) /** * try_toggle_control_gpio - configure a gpio if it exists * @dev: net device @@@ -2205,7 -2204,7 +2205,7 @@@ static int try_toggle_control_gpio(stru const char *name, int index, int value, unsigned int nsdelay) { - struct gpio_desc *gpio = *desc; + struct gpio_desc *gpio; enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH;
gpio = devm_gpiod_get_index_optional(dev, name, index, flags); @@@ -2221,15 -2220,6 +2221,15 @@@
return 0; } +#else +static int try_toggle_control_gpio(struct device *dev, + struct gpio_desc **desc, + const char *name, int index, + int value, unsigned int nsdelay) +{ + return 0; +} +#endif #endif
/* diff --combined drivers/net/ethernet/xilinx/xilinx_axienet.h index aca7f82f6791,708769349f76..5b4d153b1492 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@@ -376,6 -376,8 +376,8 @@@ struct axidma_bd struct sk_buff *skb; } __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT);
+ #define XAE_NUM_MISC_CLOCKS 3 + /** * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. @@@ -385,7 -387,8 +387,8 @@@ * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core - * @clk: Clock for AXI bus + * @axi_clk: AXI4-Lite bus clock + * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks) * @mii_bus: Pointer to MII bus structure * @mii_clk_div: MII bus clock divider value * @regs_start: Resource start for axienet device addresses @@@ -434,7 -437,8 +437,8 @@@ struct axienet_local
bool switch_x_sgmii;
- struct clk *clk; + struct clk *axi_clk; + struct clk_bulk_data misc_clks[XAE_NUM_MISC_CLOCKS];
struct mii_bus *mii_bus; u8 mii_clk_div; @@@ -504,18 -508,6 +508,18 @@@ static inline u32 axinet_ior_read_mcr(s return axienet_ior(lp, XAE_MDIO_MCR_OFFSET); }
+static inline void axienet_lock_mii(struct axienet_local *lp) +{ + if (lp->mii_bus) + mutex_lock(&lp->mii_bus->mdio_lock); +} + +static inline void axienet_unlock_mii(struct axienet_local *lp) +{ + if (lp->mii_bus) + mutex_unlock(&lp->mii_bus->mdio_lock); +} + /** * axienet_iow - Memory mapped Axi Ethernet register write * @lp: Pointer to axienet local structure diff --combined drivers/net/ethernet/xilinx/xilinx_axienet_main.c index f8f8654ea728,92cf9051d557..feb1aa4ec927 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@@ -1053,9 -1053,9 +1053,9 @@@ static int axienet_open(struct net_devi * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); ret = axienet_device_reset(ndev); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp);
ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); if (ret) { @@@ -1148,9 -1148,9 +1148,9 @@@ static int axienet_stop(struct net_devi }
/* Do a reset to ensure DMA is really stopped */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp);
cancel_work_sync(&lp->dma_err_task);
@@@ -1709,9 -1709,9 +1709,9 @@@ static void axienet_dma_err_handler(str * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp);
for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; @@@ -1863,22 -1863,39 +1863,39 @@@ static int axienet_probe(struct platfor lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT;
- lp->clk = devm_clk_get_optional(&pdev->dev, NULL); - if (IS_ERR(lp->clk)) { - ret = PTR_ERR(lp->clk); + lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); + if (!lp->axi_clk) { + /* For backward compatibility, if named AXI clock is not present, + * treat the first clock specified as the AXI clock. + */ + lp->axi_clk = devm_clk_get_optional(&pdev->dev, NULL); + } + if (IS_ERR(lp->axi_clk)) { + ret = PTR_ERR(lp->axi_clk); goto free_netdev; } - ret = clk_prepare_enable(lp->clk); + ret = clk_prepare_enable(lp->axi_clk); if (ret) { - dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); + dev_err(&pdev->dev, "Unable to enable AXI clock: %d\n", ret); goto free_netdev; }
+ lp->misc_clks[0].id = "axis_clk"; + lp->misc_clks[1].id = "ref_clk"; + lp->misc_clks[2].id = "mgt_clk"; + + ret = devm_clk_bulk_get_optional(&pdev->dev, XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + + ret = clk_bulk_prepare_enable(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (IS_ERR(lp->regs)) { - dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = PTR_ERR(lp->regs); goto cleanup_clk; } @@@ -2109,7 -2126,8 +2126,8 @@@ cleanup_mdio of_node_put(lp->phy_node);
cleanup_clk: - clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk);
free_netdev: free_netdev(ndev); @@@ -2132,7 -2150,8 +2150,8 @@@ static int axienet_remove(struct platfo
axienet_mdio_teardown(lp);
- clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk);
of_node_put(lp->phy_node); lp->phy_node = NULL; diff --combined drivers/net/geneve.c index d5b1e48e0c09,5d7a2b1469f4..e3b2375ac5eb --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@@ -461,6 -461,7 +461,7 @@@ static struct socket *geneve_create_soc if (err < 0) return ERR_PTR(err);
+ udp_allow_gso(sock->sk); return sock; }
@@@ -908,16 -909,8 +909,16 @@@ static int geneve_xmit_skb(struct sk_bu
info = skb_tunnel_info(skb); if (info) { - info->key.u.ipv4.dst = fl4.saddr; - info->key.u.ipv4.src = fl4.daddr; + struct ip_tunnel_info *unclone; + + unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) { + dst_release(&rt->dst); + return -ENOMEM; + } + + unclone->key.u.ipv4.dst = fl4.saddr; + unclone->key.u.ipv4.src = fl4.daddr; }
if (!pskb_may_pull(skb, ETH_HLEN)) { @@@ -1001,16 -994,8 +1002,16 @@@ static int geneve6_xmit_skb(struct sk_b struct ip_tunnel_info *info = skb_tunnel_info(skb);
if (info) { - info->key.u.ipv6.dst = fl6.saddr; - info->key.u.ipv6.src = fl6.daddr; + struct ip_tunnel_info *unclone; + + unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) { + dst_release(dst); + return -ENOMEM; + } + + unclone->key.u.ipv6.dst = fl6.saddr; + unclone->key.u.ipv6.src = fl6.daddr; }
if (!pskb_may_pull(skb, ETH_HLEN)) { diff --combined drivers/net/tun.c index 4cf38be26dc9,6e55697315de..36443d506b67 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@@ -69,14 -69,6 +69,14 @@@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/mutex.h> +#include <linux/ieee802154.h> +#include <linux/if_ltalk.h> +#include <uapi/linux/if_fddi.h> +#include <uapi/linux/if_hippi.h> +#include <uapi/linux/if_fc.h> +#include <net/ax25.h> +#include <net/rose.h> +#include <net/6lowpan.h>
#include <linux/uaccess.h> #include <linux/proc_fs.h> @@@ -1189,8 -1181,7 +1189,7 @@@ static int tun_xdp_xmit(struct net_devi struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile; u32 numqueues; - int drops = 0; - int cnt = n; + int nxmit = 0; int i;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@@ -1220,9 -1211,9 +1219,9 @@@ resample
if (__ptr_ring_produce(&tfile->tx_ring, frame)) { atomic_long_inc(&dev->tx_dropped); - xdp_return_frame_rx_napi(xdp); - drops++; + break; } + nxmit++; } spin_unlock(&tfile->tx_ring.producer_lock);
@@@ -1230,17 -1221,21 +1229,21 @@@ __tun_xdp_flush_tfile(tfile);
rcu_read_unlock(); - return cnt - drops; + return nxmit; }
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) { struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); + int nxmit;
if (unlikely(!frame)) return -EOVERFLOW;
- return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH); + nxmit = tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH); + if (!nxmit) + xdp_return_frame_rx_napi(frame); + return nxmit; }
static const struct net_device_ops tap_netdev_ops = { @@@ -2927,45 -2922,6 +2930,45 @@@ static int tun_set_ebpf(struct tun_stru return __tun_set_ebpf(tun, prog_p, prog); }
+/* Return correct value for tun->dev->addr_len based on tun->dev->type. */ +static unsigned char tun_get_addr_len(unsigned short type) +{ + switch (type) { + case ARPHRD_IP6GRE: + case ARPHRD_TUNNEL6: + return sizeof(struct in6_addr); + case ARPHRD_IPGRE: + case ARPHRD_TUNNEL: + case ARPHRD_SIT: + return 4; + case ARPHRD_ETHER: + return ETH_ALEN; + case ARPHRD_IEEE802154: + case ARPHRD_IEEE802154_MONITOR: + return IEEE802154_EXTENDED_ADDR_LEN; + case ARPHRD_PHONET_PIPE: + case ARPHRD_PPP: + case ARPHRD_NONE: + return 0; + case ARPHRD_6LOWPAN: + return EUI64_ADDR_LEN; + case ARPHRD_FDDI: + return FDDI_K_ALEN; + case ARPHRD_HIPPI: + return HIPPI_ALEN; + case ARPHRD_IEEE802: + return FC_ALEN; + case ARPHRD_ROSE: + return ROSE_ADDR_LEN; + case ARPHRD_NETROM: + return AX25_ADDR_LEN; + case ARPHRD_LOCALTLK: + return LTALK_ALEN; + default: + return 0; + } +} + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@@ -3129,7 -3085,6 +3132,7 @@@ break; } tun->dev->type = (int) arg; + tun->dev->addr_len = tun_get_addr_len(tun->dev->type); netif_info(tun, drv, tun->dev, "linktype set to %d\n", tun->dev->type); call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, diff --combined drivers/net/virtio_net.c index 0824e6999e49,bb4ea9dbc16b..101659cd4b87 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@@ -195,6 -195,9 +195,9 @@@ struct virtnet_info /* # of XDP queue pairs currently used by the driver */ u16 xdp_queue_pairs;
+ /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ + bool xdp_enabled; + /* I like... big packets and I cannot lie! */ bool big_packets;
@@@ -406,13 -409,9 +409,13 @@@ static struct sk_buff *page_to_skb(stru offset += hdr_padded_len; p += hdr_padded_len;
- copy = len; - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); + /* Copy all frame if it fits skb->head, otherwise + * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. + */ + if (len <= skb_tailroom(skb)) + copy = len; + else + copy = ETH_HLEN + metasize; skb_put_data(skb, p, copy);
if (metasize) { @@@ -485,12 -484,41 +488,41 @@@ static int __virtnet_xdp_xmit_one(struc return 0; }
- static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) - { - unsigned int qp; - - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - return &vi->sq[qp]; + /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on + * the current cpu, so it does not need to be locked. + * + * Here we use marco instead of inline functions because we have to deal with + * three issues at the same time: 1. the choice of sq. 2. judge and execute the + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline + * functions to perfectly solve these three problems at the same time. + */ + #define virtnet_xdp_get_sq(vi) ({ \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + unsigned int qp; \ + \ + if (v->curr_queue_pairs > nr_cpu_ids) { \ + qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ + qp += smp_processor_id(); \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_acquire(txq); \ + } else { \ + qp = smp_processor_id() % v->curr_queue_pairs; \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_lock(txq, raw_smp_processor_id()); \ + } \ + v->sq + qp; \ + }) + + #define virtnet_xdp_put_sq(vi, q) { \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + \ + txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ + if (v->curr_queue_pairs > nr_cpu_ids) \ + __netif_tx_release(txq); \ + else \ + __netif_tx_unlock(txq); \ }
static int virtnet_xdp_xmit(struct net_device *dev, @@@ -503,10 -531,10 +535,10 @@@ unsigned int len; int packets = 0; int bytes = 0; - int drops = 0; + int nxmit = 0; int kicks = 0; - int ret, err; void *ptr; + int ret; int i;
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this @@@ -516,11 -544,10 +548,10 @@@ if (!xdp_prog) return -ENXIO;
- sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; - drops = n; goto out; }
@@@ -543,13 -570,11 +574,11 @@@ for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i];
- err = __virtnet_xdp_xmit_one(vi, sq, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) + break; + nxmit++; } - ret = n - drops; + ret = nxmit;
if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) @@@ -560,16 -585,17 +589,17 @@@ out sq->stats.bytes += bytes; sq->stats.packets += packets; sq->stats.xdp_tx += n; - sq->stats.xdp_tx_drops += drops; + sq->stats.xdp_tx_drops += n - nxmit; sq->stats.kicks += kicks; u64_stats_update_end(&sq->stats.syncp);
+ virtnet_xdp_put_sq(vi, sq); return ret; }
static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { - return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; + return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; }
/* We copy the packet for XDP in the following cases: @@@ -713,7 -739,9 +743,9 @@@ static struct sk_buff *receive_small(st if (unlikely(!xdpf)) goto err_xdp; err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); - if (unlikely(err < 0)) { + if (unlikely(!err)) { + xdp_return_frame_rx_napi(xdpf); + } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } @@@ -900,7 -928,9 +932,9 @@@ static struct sk_buff *receive_mergeabl if (unlikely(!xdpf)) goto err_xdp; err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); - if (unlikely(err < 0)) { + if (unlikely(!err)) { + xdp_return_frame_rx_napi(xdpf); + } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) put_page(xdp_page); @@@ -1462,12 -1492,13 +1496,13 @@@ static int virtnet_poll(struct napi_str xdp_do_flush();
if (xdp_xmit & VIRTIO_XDP_TX) { - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); sq->stats.kicks++; u64_stats_update_end(&sq->stats.syncp); } + virtnet_xdp_put_sq(vi, sq); }
return received; @@@ -1985,7 -2016,7 +2020,7 @@@ static void virtnet_set_affinity(struc } virtqueue_set_affinity(vi->rq[i].vq, mask); virtqueue_set_affinity(vi->sq[i].vq, mask); - __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); + __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); cpumask_clear(mask); }
@@@ -2108,25 -2139,21 +2143,21 @@@ static int virtnet_set_channels(struct static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct virtnet_info *vi = netdev_priv(dev); - char *p = (char *)data; unsigned int i, j; + u8 *p = data;
switch (stringset) { case ETH_SS_STATS: for (i = 0; i < vi->curr_queue_pairs; i++) { - for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", - i, virtnet_rq_stats_desc[j].desc); - p += ETH_GSTRING_LEN; - } + for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) + ethtool_sprintf(&p, "rx_queue_%u_%s", i, + virtnet_rq_stats_desc[j].desc); }
for (i = 0; i < vi->curr_queue_pairs; i++) { - for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s", - i, virtnet_sq_stats_desc[j].desc); - p += ETH_GSTRING_LEN; - } + for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) + ethtool_sprintf(&p, "tx_queue_%u_%s", i, + virtnet_sq_stats_desc[j].desc); } break; } @@@ -2422,10 -2449,9 +2453,9 @@@ static int virtnet_xdp_set(struct net_d
/* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { - NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); - netdev_warn(dev, "request %i queues but max is %i\n", + netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", curr_qp + xdp_qp, vi->max_queue_pairs); - return -ENOMEM; + xdp_qp = 0; }
old_prog = rtnl_dereference(vi->rq[0].xdp_prog); @@@ -2459,11 -2485,14 +2489,14 @@@ vi->xdp_queue_pairs = xdp_qp;
if (prog) { + vi->xdp_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + } else { + vi->xdp_enabled = false; }
for (i = 0; i < vi->max_queue_pairs; i++) { @@@ -2531,7 -2560,7 +2564,7 @@@ static int virtnet_set_features(struct int err;
if ((dev->features ^ features) & NETIF_F_LRO) { - if (vi->xdp_queue_pairs) + if (vi->xdp_enabled) return -EBUSY;
if (features & NETIF_F_LRO) @@@ -2977,7 -3006,8 +3010,8 @@@ static int virtnet_probe(struct virtio_ return -ENOMEM;
/* Set up network device as normal. */ - dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; + dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | + IFF_TX_SKB_NO_LINEAR; dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA;
diff --combined drivers/net/vxlan.c index 53dbc67e8a34,39ee1300cdd9..02a14f1b938a --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@@ -2725,17 -2725,12 +2725,17 @@@ static void vxlan_xmit_one(struct sk_bu goto tx_error; } else if (err) { if (info) { + struct ip_tunnel_info *unclone; struct in_addr src, dst;
+ unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) + goto tx_error; + src = remote_ip.sin.sin_addr; dst = local_ip.sin.sin_addr; - info->key.u.ipv4.src = src.s_addr; - info->key.u.ipv4.dst = dst.s_addr; + unclone->key.u.ipv4.src = src.s_addr; + unclone->key.u.ipv4.dst = dst.s_addr; } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); dst_release(ndst); @@@ -2786,17 -2781,12 +2786,17 @@@ goto tx_error; } else if (err) { if (info) { + struct ip_tunnel_info *unclone; struct in6_addr src, dst;
+ unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) + goto tx_error; + src = remote_ip.sin6.sin6_addr; dst = local_ip.sin6.sin6_addr; - info->key.u.ipv6.src = src; - info->key.u.ipv6.dst = dst; + unclone->key.u.ipv6.src = src; + unclone->key.u.ipv6.dst = dst; }
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); @@@ -3494,6 -3484,7 +3494,7 @@@ static struct socket *vxlan_create_sock if (err < 0) return ERR_PTR(err);
+ udp_allow_gso(sock->sk); return sock; }
@@@ -3713,6 -3704,7 +3714,7 @@@ static int vxlan_config_validate(struc #if IS_ENABLED(CONFIG_IPV6) if (use_ipv6) { struct inet6_dev *idev = __in6_dev_get(lowerdev); + if (idev && idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 support disabled by administrator"); diff --combined drivers/s390/net/qeth_core_main.c index 175b82b98f36,6954d4e831a3..a1f08e9aa064 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -369,7 -369,8 +369,7 @@@ static int qeth_cq_init(struct qeth_car QDIO_MAX_BUFFERS_PER_Q); card->qdio.c_q->next_buf_to_init = 127; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, - card->qdio.no_in_queues - 1, 0, - 127); + card->qdio.no_in_queues - 1, 0, 127, NULL); if (rc) { QETH_CARD_TEXT_(card, 2, "1err%d", rc); goto out; @@@ -382,22 -383,48 +382,22 @@@ out
static int qeth_alloc_cq(struct qeth_card *card) { - int rc; - if (card->options.cq == QETH_CQ_ENABLED) { - int i; - struct qdio_outbuf_state *outbuf_states; - QETH_CARD_TEXT(card, 2, "cqon"); card->qdio.c_q = qeth_alloc_qdio_queue(); if (!card->qdio.c_q) { - rc = -1; - goto kmsg_out; + dev_err(&card->gdev->dev, "Failed to create completion queue\n"); + return -ENOMEM; } + card->qdio.no_in_queues = 2; - card->qdio.out_bufstates = - kcalloc(card->qdio.no_out_queues * - QDIO_MAX_BUFFERS_PER_Q, - sizeof(struct qdio_outbuf_state), - GFP_KERNEL); - outbuf_states = card->qdio.out_bufstates; - if (outbuf_states == NULL) { - rc = -1; - goto free_cq_out; - } - for (i = 0; i < card->qdio.no_out_queues; ++i) { - card->qdio.out_qs[i]->bufstates = outbuf_states; - outbuf_states += QDIO_MAX_BUFFERS_PER_Q; - } } else { QETH_CARD_TEXT(card, 2, "nocq"); card->qdio.c_q = NULL; card->qdio.no_in_queues = 1; } QETH_CARD_TEXT_(card, 2, "iqc%d", card->qdio.no_in_queues); - rc = 0; -out: - return rc; -free_cq_out: - qeth_free_qdio_queue(card->qdio.c_q); - card->qdio.c_q = NULL; -kmsg_out: - dev_err(&card->gdev->dev, "Failed to create completion queue\n"); - goto out; + return 0; }
static void qeth_free_cq(struct qeth_card *card) @@@ -407,6 -434,8 +407,6 @@@ qeth_free_qdio_queue(card->qdio.c_q); card->qdio.c_q = NULL; } - kfree(card->qdio.out_bufstates); - card->qdio.out_bufstates = NULL; }
static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, @@@ -458,12 -487,12 +458,12 @@@ static void qeth_qdio_handle_aob(struc switch (atomic_xchg(&buffer->state, new_state)) { case QETH_QDIO_BUF_PRIMED: /* Faster than TX completion code, let it handle the async - * completion for us. + * completion for us. It will also recycle the QAOB. */ break; case QETH_QDIO_BUF_PENDING: /* TX completion code is active and will handle the async - * completion for us. + * completion for us. It will also recycle the QAOB. */ break; case QETH_QDIO_BUF_NEED_QAOB: @@@ -472,7 -501,7 +472,7 @@@ qeth_notify_skbs(buffer->q, buffer, notification);
/* Free dangling allocations. The attached skbs are handled by - * qeth_tx_complete_pending_bufs(). + * qeth_tx_complete_pending_bufs(), and so is the QAOB. */ for (i = 0; i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); @@@ -491,6 -520,8 +491,6 @@@ default: WARN_ON_ONCE(1); } - - qdio_release_aob(aob); }
static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len, @@@ -1420,16 -1451,9 +1420,16 @@@ static void qeth_clear_output_buffer(st atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY); }
+static void qeth_free_out_buf(struct qeth_qdio_out_buffer *buf) +{ + if (buf->aob) + qdio_release_aob(buf->aob); + kmem_cache_free(qeth_qdio_outbuf_cache, buf); +} + static void qeth_tx_complete_pending_bufs(struct qeth_card *card, struct qeth_qdio_out_q *queue, - bool drain) + bool drain, int budget) { struct qeth_qdio_out_buffer *buf, *tmp;
@@@ -1441,10 -1465,10 +1441,10 @@@ if (drain) qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR); - qeth_tx_complete_buf(buf, drain, 0); + qeth_tx_complete_buf(buf, drain, budget);
list_del(&buf->list_entry); - kmem_cache_free(qeth_qdio_outbuf_cache, buf); + qeth_free_out_buf(buf); } } } @@@ -1453,7 -1477,7 +1453,7 @@@ static void qeth_drain_output_queue(str { int j;
- qeth_tx_complete_pending_bufs(q->card, q, true); + qeth_tx_complete_pending_bufs(q->card, q, true, 0);
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (!q->bufs[j]) @@@ -1461,7 -1485,7 +1461,7 @@@
qeth_clear_output_buffer(q, q->bufs[j], true, 0); if (free) { - kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]); + qeth_free_out_buf(q->bufs[j]); q->bufs[j] = NULL; } } @@@ -2566,11 -2590,12 +2566,12 @@@ static int qeth_ulp_setup(struct qeth_c return qeth_send_control_data(card, iob, qeth_ulp_setup_cb, NULL); }
- static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) + static int qeth_alloc_out_buf(struct qeth_qdio_out_q *q, unsigned int bidx, + gfp_t gfp) { struct qeth_qdio_out_buffer *newbuf;
- newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC); + newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, gfp); if (!newbuf) return -ENOMEM;
@@@ -2605,7 -2630,7 +2606,7 @@@ static struct qeth_qdio_out_q *qeth_all goto err_qdio_bufs;
for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) { - if (qeth_init_qdio_out_buf(q, i)) + if (qeth_alloc_out_buf(q, i, GFP_KERNEL)) goto err_out_bufs; }
@@@ -2613,7 -2638,7 +2614,7 @@@
err_out_bufs: while (i > 0) - kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]); + qeth_free_out_buf(q->bufs[--i]); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); err_qdio_bufs: kfree(q); @@@ -3000,8 -3025,7 +3001,8 @@@ static int qeth_init_qdio_queues(struc }
card->qdio.in_q->next_buf_to_init = QDIO_BUFNR(rx_bufs); - rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs); + rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs, + NULL); if (rc) { QETH_CARD_TEXT_(card, 2, "1err%d", rc); return rc; @@@ -3493,7 -3517,7 +3494,7 @@@ static unsigned int qeth_rx_refill_queu }
rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, - queue->next_buf_to_init, count); + queue->next_buf_to_init, count, NULL); if (rc) { QETH_CARD_TEXT(card, 2, "qinberr"); } @@@ -3602,7 -3626,6 +3603,7 @@@ static void qeth_flush_buffers(struct q struct qeth_qdio_out_buffer *buf = queue->bufs[index]; unsigned int qdio_flags = QDIO_FLAG_SYNC_OUTPUT; struct qeth_card *card = queue->card; + struct qaob *aob = NULL; int rc; int i;
@@@ -3615,24 -3638,16 +3616,24 @@@ SBAL_EFLAGS_LAST_ENTRY; queue->coalesced_frames += buf->frames;
- if (queue->bufstates) - queue->bufstates[bidx].user = buf; - if (IS_IQD(card)) { skb_queue_walk(&buf->skb_list, skb) skb_tx_timestamp(skb); } }
- if (!IS_IQD(card)) { + if (IS_IQD(card)) { + if (card->options.cq == QETH_CQ_ENABLED && + !qeth_iqd_is_mcast_queue(card, queue) && + count == 1) { + if (!buf->aob) + buf->aob = qdio_allocate_aob(); + if (buf->aob) { + aob = buf->aob; + aob->user1 = (u64) buf; + } + } + } else { if (!queue->do_pack) { if ((atomic_read(&queue->used_buffers) >= (QETH_HIGH_WATERMARK_PACK - @@@ -3663,8 -3678,8 +3664,8 @@@ }
QETH_TXQ_STAT_INC(queue, doorbell); - rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, - queue->queue_no, index, count); + rc = do_QDIO(CARD_DDEV(card), qdio_flags, queue->queue_no, index, count, + aob);
switch (rc) { case 0: @@@ -3800,7 -3815,8 +3801,7 @@@ static void qeth_qdio_cq_handler(struc qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER); } rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue, - card->qdio.c_q->next_buf_to_init, - count); + cq->next_buf_to_init, count, NULL); if (rc) { dev_warn(&card->gdev->dev, "QDIO reported an error, rc=%i\n", rc); @@@ -5255,6 -5271,7 +5256,6 @@@ static int qeth_qdio_establish(struct q init_data.int_parm = (unsigned long) card; init_data.input_sbal_addr_array = in_sbal_ptrs; init_data.output_sbal_addr_array = out_sbal_ptrs; - init_data.output_sbal_state_array = card->qdio.out_bufstates; init_data.scan_threshold = IS_IQD(card) ? 0 : 32;
if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED, @@@ -6053,15 -6070,7 +6054,15 @@@ static void qeth_iqd_tx_complete(struc bool error = !!qdio_error;
if (qdio_error == QDIO_ERROR_SLSB_PENDING) { - WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED); + struct qaob *aob = buffer->aob; + + if (!aob) { + netdev_WARN_ONCE(card->dev, + "Pending TX buffer %#x without QAOB on TX queue %u\n", + bidx, queue->queue_no); + qeth_schedule_recovery(card); + return; + }
QETH_CARD_TEXT_(card, 5, "pel%u", bidx);
@@@ -6080,7 -6089,8 +6081,8 @@@
/* Prepare the queue slot for immediate re-use: */ qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements); - if (qeth_init_qdio_out_buf(queue, bidx)) { + if (qeth_alloc_out_buf(queue, bidx, + GFP_ATOMIC)) { QETH_CARD_TEXT(card, 2, "outofbuf"); qeth_schedule_recovery(card); } @@@ -6117,8 -6127,6 +6119,8 @@@ default: WARN_ON_ONCE(1); } + + memset(aob, 0, sizeof(*aob)); } else if (card->options.cq == QETH_CQ_ENABLED) { qeth_notify_skbs(queue, buffer, qeth_compute_cq_notification(sflags, 0)); @@@ -6144,7 -6152,7 +6146,7 @@@ static int qeth_tx_poll(struct napi_str unsigned int bytes = 0; int completed;
- qeth_tx_complete_pending_bufs(card, queue, false); + qeth_tx_complete_pending_bufs(card, queue, false, budget);
if (qeth_out_queue_is_empty(queue)) { napi_complete(napi); diff --combined include/linux/avf/virtchnl.h index 532bcbfc4716,47482049f640..40dd6afbfd81 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@@ -136,6 -136,9 +136,9 @@@ enum virtchnl_ops VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, + /* opcode 34 - 46 are reserved */ + VIRTCHNL_OP_ADD_FDIR_FILTER = 47, + VIRTCHNL_OP_DEL_FDIR_FILTER = 48, };
/* These macros are used to generate compilation errors if a structure/union @@@ -247,6 -250,7 +250,7 @@@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_ #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 + #define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000
/* Define below the capability flags that are not offloads */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 @@@ -476,6 -480,7 +480,6 @@@ struct virtchnl_rss_key u16 vsi_id; u16 key_len; u8 key[1]; /* RSS hash key, packed bytes */ - u8 pad[1]; };
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); @@@ -484,6 -489,7 +488,6 @@@ struct virtchnl_rss_lut u16 vsi_id; u16 lut_entries; u8 lut[1]; /* RSS lookup table */ - u8 pad[1]; };
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); @@@ -557,6 -563,11 +561,11 @@@ enum virtchnl_action /* action types */ VIRTCHNL_ACTION_DROP = 0, VIRTCHNL_ACTION_TC_REDIRECT, + VIRTCHNL_ACTION_PASSTHRU, + VIRTCHNL_ACTION_QUEUE, + VIRTCHNL_ACTION_Q_REGION, + VIRTCHNL_ACTION_MARK, + VIRTCHNL_ACTION_COUNT, };
enum virtchnl_flow_type { @@@ -666,6 -677,269 +675,269 @@@ enum virtchnl_vfr_states VIRTCHNL_VFR_VFACTIVE, };
+ #define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 + #define PROTO_HDR_SHIFT 5 + #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) + #define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1) + + /* VF use these macros to configure each protocol header. + * Specify which protocol headers and protocol header fields base on + * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field. + * @param hdr: a struct of virtchnl_proto_hdr + * @param hdr_type: ETH/IPV4/TCP, etc + * @param field: SRC/DST/TEID/SPI, etc + */ + #define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK)) + #define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK)) + #define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \ + ((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK)) + #define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr) ((hdr)->field_selector) + + #define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) + #define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) + + #define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \ + ((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type) + #define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \ + (((hdr)->type) >> PROTO_HDR_SHIFT) + #define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \ + ((hdr)->type == ((val) >> PROTO_HDR_SHIFT)) + #define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \ + (VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \ + VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val))) + + /* Protocol header type within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization. + */ + enum virtchnl_proto_hdr_type { + VIRTCHNL_PROTO_HDR_NONE, + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_S_VLAN, + VIRTCHNL_PROTO_HDR_C_VLAN, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_TCP, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_SCTP, + VIRTCHNL_PROTO_HDR_GTPU_IP, + VIRTCHNL_PROTO_HDR_GTPU_EH, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + VIRTCHNL_PROTO_HDR_PPPOE, + VIRTCHNL_PROTO_HDR_L2TPV3, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_AH, + VIRTCHNL_PROTO_HDR_PFCP, + }; + + /* Protocol header field within a protocol header. */ + enum virtchnl_proto_hdr_field { + /* ETHER */ + VIRTCHNL_PROTO_HDR_ETH_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH), + VIRTCHNL_PROTO_HDR_ETH_DST, + VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, + /* S-VLAN */ + VIRTCHNL_PROTO_HDR_S_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN), + /* C-VLAN */ + VIRTCHNL_PROTO_HDR_C_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN), + /* IPV4 */ + VIRTCHNL_PROTO_HDR_IPV4_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4), + VIRTCHNL_PROTO_HDR_IPV4_DST, + VIRTCHNL_PROTO_HDR_IPV4_DSCP, + VIRTCHNL_PROTO_HDR_IPV4_TTL, + VIRTCHNL_PROTO_HDR_IPV4_PROT, + /* IPV6 */ + VIRTCHNL_PROTO_HDR_IPV6_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6), + VIRTCHNL_PROTO_HDR_IPV6_DST, + VIRTCHNL_PROTO_HDR_IPV6_TC, + VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, + VIRTCHNL_PROTO_HDR_IPV6_PROT, + /* TCP */ + VIRTCHNL_PROTO_HDR_TCP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP), + VIRTCHNL_PROTO_HDR_TCP_DST_PORT, + /* UDP */ + VIRTCHNL_PROTO_HDR_UDP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP), + VIRTCHNL_PROTO_HDR_UDP_DST_PORT, + /* SCTP */ + VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP), + VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, + /* GTPU_IP */ + VIRTCHNL_PROTO_HDR_GTPU_IP_TEID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP), + /* GTPU_EH */ + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH), + VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, + /* PPPOE */ + VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE), + /* L2TPV3 */ + VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3), + /* ESP */ + VIRTCHNL_PROTO_HDR_ESP_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP), + /* AH */ + VIRTCHNL_PROTO_HDR_AH_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH), + /* PFCP */ + VIRTCHNL_PROTO_HDR_PFCP_S_FIELD = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP), + VIRTCHNL_PROTO_HDR_PFCP_SEID, + }; + + struct virtchnl_proto_hdr { + enum virtchnl_proto_hdr_type type; + u32 field_selector; /* a bit mask to select field for header type */ + u8 buffer[64]; + /** + * binary buffer in network order for specific header type. + * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4 + * header is expected to be copied into the buffer. + */ + }; + + VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr); + + struct virtchnl_proto_hdrs { + u8 tunnel_level; + /** + * specify where protocol header start from. + * 0 - from the outer layer + * 1 - from the first inner layer + * 2 - from the second inner layer + * .... + **/ + int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; + }; + + VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); + + /* action configuration for FDIR */ + struct virtchnl_filter_action { + enum virtchnl_action type; + union { + /* used for queue and qgroup action */ + struct { + u16 index; + u8 region; + } queue; + /* used for count action */ + struct { + /* share counter ID with other flow rules */ + u8 shared; + u32 id; /* counter ID */ + } count; + /* used for mark action */ + u32 mark_id; + u8 reserve[32]; + } act_conf; + }; + + VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); + + #define VIRTCHNL_MAX_NUM_ACTIONS 8 + + struct virtchnl_filter_action_set { + /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ + int count; + struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; + }; + + VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set); + + /* pattern and action for FDIR rule */ + struct virtchnl_fdir_rule { + struct virtchnl_proto_hdrs proto_hdrs; + struct virtchnl_filter_action_set action_set; + }; + + VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule); + + /* Status returned to VF after VF requests FDIR commands + * VIRTCHNL_FDIR_SUCCESS + * VF FDIR related request is successfully done by PF + * The request can be OP_ADD/DEL. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE + * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource. + * + * VIRTCHNL_FDIR_FAILURE_RULE_EXIST + * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed. + * + * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT + * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST + * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist. + * + * VIRTCHNL_FDIR_FAILURE_RULE_INVALID + * OP_ADD_FDIR_FILTER request is failed due to parameters validation + * or HW doesn't support. + * + * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT + * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out + * for programming. + */ + enum virtchnl_fdir_prgm_status { + VIRTCHNL_FDIR_SUCCESS = 0, + VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE, + VIRTCHNL_FDIR_FAILURE_RULE_EXIST, + VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT, + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST, + VIRTCHNL_FDIR_FAILURE_RULE_INVALID, + VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT, + }; + + /* VIRTCHNL_OP_ADD_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id, + * validate_only and rule_cfg. PF will return flow_id + * if the request is successfully done and return add_status to VF. + */ + struct virtchnl_fdir_add { + u16 vsi_id; /* INPUT */ + /* + * 1 for validating a fdir rule, 0 for creating a fdir rule. + * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER. + */ + u16 validate_only; /* INPUT */ + u32 flow_id; /* OUTPUT */ + struct virtchnl_fdir_rule rule_cfg; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ + }; + + VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add); + + /* VIRTCHNL_OP_DEL_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id + * and flow_id. PF will return del_status to VF. + */ + struct virtchnl_fdir_del { + u16 vsi_id; /* INPUT */ + u16 pad; + u32 flow_id; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ + }; + + VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); + /** * virtchnl_vc_validate_vf_msg * @ver: Virtchnl version info @@@ -826,6 -1100,12 +1098,12 @@@ virtchnl_vc_validate_vf_msg(struct virt case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_add); + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_del); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --combined include/linux/bpf.h index fdac0534ce79,9fdd839b418c..c9b7a876b0c8 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -40,7 -40,7 +40,8 @@@ struct bpf_local_storage struct bpf_local_storage_map; struct kobject; struct mem_cgroup; +struct module; + struct bpf_func_state;
extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@@ -56,7 -56,7 +57,7 @@@ struct bpf_iter_seq_info u32 seq_priv_size; };
- /* map is generic key/value storage optionally accesible by eBPF programs */ + /* map is generic key/value storage optionally accessible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ int (*map_alloc_check)(union bpf_attr *attr); @@@ -119,6 -119,9 +120,9 @@@ void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+ /* Misc helpers.*/ + int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure * an inner map can be inserted to an outer map. @@@ -131,6 -134,13 +135,13 @@@ bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1);
+ + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@@ -297,6 -307,8 +308,8 @@@ enum bpf_arg_type ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, };
@@@ -413,6 -425,9 +426,9 @@@ enum bpf_reg_type PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ + __BPF_REG_TYPE_MAX, };
/* The information passed from prog-specific *_is_valid_access @@@ -466,6 -481,7 +482,7 @@@ struct bpf_verifier_ops const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); + bool (*check_kfunc_call)(u32 kfunc_btf_id); };
struct bpf_prog_offload_ops { @@@ -508,6 -524,11 +525,11 @@@ enum bpf_cgroup_storage_type */ #define MAX_BPF_FUNC_ARGS 12
+ /* The maximum number of arguments passed through registers + * a single function may have. + */ + #define MAX_BPF_FUNC_REG_ARGS 5 + struct btf_func_model { u8 ret_size; u8 nr_args; @@@ -624,7 -645,6 +646,7 @@@ struct bpf_trampoline /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; u64 selector; + struct module *mod; };
struct bpf_attach_target_info { @@@ -778,6 -798,8 +800,8 @@@ struct btf_mod_pair struct module *module; };
+ struct bpf_kfunc_desc_tab; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@@ -814,6 -836,7 +838,7 @@@ struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; + struct bpf_kfunc_desc_tab *kfunc_tab; u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; @@@ -1088,6 -1111,13 +1113,13 @@@ int bpf_prog_array_copy(struct bpf_prog /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0)
+ /* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, + * if bpf_cgroup_storage_set() failed, the rest of programs + * will not execute. This should be a really rare scenario + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of + * preemptions all between bpf_cgroup_storage_set() and + * bpf_cgroup_storage_unset() on the same cpu. + */ #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ ({ \ struct bpf_prog_array_item *_item; \ @@@ -1100,10 -1130,12 +1132,12 @@@ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ func_ret = func(_prog, ctx); \ _ret &= (func_ret & 1); \ *(ret_flags) |= (func_ret >> 1); \ + bpf_cgroup_storage_unset(); \ _item++; \ } \ rcu_read_unlock(); \ @@@ -1124,9 -1156,14 +1158,14 @@@ goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - if (set_cg_storage) \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ - _ret &= func(_prog, ctx); \ + if (!set_cg_storage) { \ + _ret &= func(_prog, ctx); \ + } else { \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ + _ret &= func(_prog, ctx); \ + bpf_cgroup_storage_unset(); \ + } \ _item++; \ } \ _out: \ @@@ -1399,6 -1436,10 +1438,10 @@@ void bpf_iter_map_show_fdinfo(const str int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info);
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@@ -1448,9 -1489,9 +1491,9 @@@ struct btf *bpf_get_btf_vmlinux(void) /* Map specifics */ struct xdp_buff; struct sk_buff; + struct bpf_dtab_netdev; + struct bpf_cpu_map_entry;
- struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); - struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); void __dev_flush(void); int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx); @@@ -1460,7 -1501,6 +1503,6 @@@ int dev_map_generic_redirect(struct bpf struct bpf_prog *xdp_prog); bool dev_map_can_have_prog(struct bpf_map *map);
- struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@@ -1489,6 -1529,10 +1531,10 @@@ int bpf_prog_test_run_flow_dissector(st int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); + int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); + bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@@ -1507,8 -1551,11 +1553,11 @@@ int btf_distill_func_proto(struct bpf_v struct btf_func_model *m);
struct bpf_reg_state; - int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); + int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); + int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@@ -1518,6 -1565,11 +1567,11 @@@ struct bpf_prog *bpf_prog_by_id(u32 id) struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); + void bpf_task_storage_free(struct task_struct *task); + bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); + const struct btf_func_model * + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@@ -1587,17 -1639,6 +1641,6 @@@ static inline int bpf_obj_get_user(cons return -EOPNOTSUPP; }
- static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, - u32 key) - { - return NULL; - } - - static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, - u32 key) - { - return NULL; - } static inline bool dev_map_can_have_prog(struct bpf_map *map) { return false; @@@ -1609,6 -1650,7 +1652,7 @@@ static inline void __dev_flush(void
struct xdp_buff; struct bpf_dtab_netdev; + struct bpf_cpu_map_entry;
static inline int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, @@@ -1633,12 -1675,6 +1677,6 @@@ static inline int dev_map_generic_redir return 0; }
- static inline - struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) - { - return NULL; - } - static inline void __cpu_map_flush(void) { } @@@ -1689,6 -1725,18 +1727,18 @@@ static inline int bpf_prog_test_run_flo return -ENOTSUPP; }
+ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) + { + return -ENOTSUPP; + } + + static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) + { + return false; + } + static inline void bpf_map_put(struct bpf_map *map) { } @@@ -1703,6 -1751,22 +1753,22 @@@ bpf_base_func_proto(enum bpf_func_id fu { return NULL; } + + static inline void bpf_task_storage_free(struct task_struct *task) + { + } + + static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) + { + return false; + } + + static inline const struct btf_func_model * + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn) + { + return NULL; + } #endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@@ -1787,22 -1851,24 +1853,24 @@@ static inline void bpf_map_offload_map_ } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
- #if defined(CONFIG_BPF_STREAM_PARSER) - int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); + + void bpf_sk_reuseport_detach(struct sock *sk); + int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); + int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); #else - static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, - struct bpf_prog *old, u32 which) + static inline void bpf_sk_reuseport_detach(struct sock *sk) { - return -EOPNOTSUPP; }
+ #ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { @@@ -1820,20 -1886,7 +1888,7 @@@ static inline int sock_map_update_elem_ { return -EOPNOTSUPP; } - #endif /* CONFIG_BPF_STREAM_PARSER */
- #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) - void bpf_sk_reuseport_detach(struct sock *sk); - int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); - int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); - #else - static inline void bpf_sk_reuseport_detach(struct sock *sk) - { - } - - #ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { @@@ -1905,6 -1958,9 +1960,9 @@@ extern const struct bpf_func_proto bpf_ extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; + extern const struct bpf_func_proto bpf_task_storage_get_proto; + extern const struct bpf_func_proto bpf_task_storage_delete_proto; + extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --combined include/linux/ethtool.h index cdca84e6dd6b,5c631a298994..47852da67263 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@@ -87,7 -87,9 +87,7 @@@ u32 ethtool_op_get_link(struct net_devi int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti);
-/** - * struct ethtool_link_ext_state_info - link extended state and substate. - */ +/* Link extended state and substate. */ struct ethtool_link_ext_state_info { enum ethtool_link_ext_state link_ext_state; union { @@@ -127,6 -129,7 +127,6 @@@ struct ethtool_link_ksettings __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; u32 lanes; - enum ethtool_link_mode_bit_indices link_mode; };
/** @@@ -289,9 -292,6 +289,9 @@@ struct ethtool_pause_stats * do not attach ext_substate attribute to netlink message). If link_ext_state * and link_ext_substate are unknown, return -ENODATA. If not implemented, * link_ext_state and link_ext_substate will not be sent to userspace. + * @get_eeprom_len: Read range of EEPROM addresses for validation of + * @get_eeprom and @set_eeprom requests. + * Returns 0 if device does not support EEPROM access. * @get_eeprom: Read data from the device EEPROM. * Should fill in the magic field. Don't need to check len for zero * or wraparound. Fill in the data argument with the eeprom values @@@ -384,8 -384,6 +384,8 @@@ * @get_module_eeprom: Get the eeprom information from the plug-in module * @get_eee: Get Energy-Efficient (EEE) supported and status. * @set_eee: Set EEE status (enable/disable) as well as LPI timers. + * @get_tunable: Read the value of a driver / device tunable. + * @set_tunable: Set the value of a driver / device tunable. * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue. * It must check that the given queue number is valid. If neither a RX nor * a TX queue has this number, return -EINVAL. If only a RX queue or a TX @@@ -412,6 -410,8 +412,8 @@@ * @get_ethtool_phy_stats: Return extended statistics about the PHY device. * This is only useful if the device maintains PHY statistics and * cannot use the standard PHY library helpers. + * @get_phy_tunable: Read the value of a PHY tunable. + * @set_phy_tunable: Set the value of a PHY tunable. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@@ -549,8 -549,8 +551,8 @@@ struct phy_tdr_config * @get_sset_count: Get number of strings that @get_strings will write. * @get_strings: Return a set of strings that describe the requested objects * @get_stats: Return extended statistics about the PHY device. - * @start_cable_test - Start a cable test - * @start_cable_test_tdr - Start a Time Domain Reflectometry cable test + * @start_cable_test: Start a cable test + * @start_cable_test_tdr: Start a Time Domain Reflectometry cable test * * All operations are optional (i.e. the function pointer may be set to %NULL) * and callers must take this into account. Callers must hold the RTNL lock. @@@ -573,12 -573,13 +575,22 @@@ struct ethtool_phy_ops */ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
+/* + * ethtool_params_from_link_mode - Derive link parameters from a given link mode + * @link_ksettings: Link parameters to be derived from the link mode + * @link_mode: Link mode + */ +void +ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, + enum ethtool_link_mode_bit_indices link_mode); ++ + /** + * ethtool_sprintf - Write formatted string to ethtool string data + * @data: Pointer to start of string to update + * @fmt: Format of string to write + * + * Write formatted string to data. Update data to point at start of + * next string. + */ + extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); #endif /* _LINUX_ETHTOOL_H */ diff --combined include/linux/mlx5/driver.h index ab07f09f2bad,baf38b5a2a8c..eb8fbd94e1c6 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@@ -517,8 -517,8 +517,8 @@@ struct mlx5_rate_limit
struct mlx5_rl_entry { u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)]; - u16 index; u64 refcount; + u16 index; u16 uid; u8 dedicated : 1; }; @@@ -530,6 -530,7 +530,7 @@@ struct mlx5_rl_table u32 max_rate; u32 min_rate; struct mlx5_rl_entry *rl_entry; + u64 refcount; };
struct mlx5_core_roce { @@@ -644,10 -645,14 +645,14 @@@ struct mlx5_td };
struct mlx5e_resources { - u32 pdn; - struct mlx5_td td; - struct mlx5_core_mkey mkey; - struct mlx5_sq_bfreg bfreg; + struct mlx5e_hw_objs { + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; + struct mlx5_sq_bfreg bfreg; + } hw_objs; + struct devlink_port dl_port; + struct net_device *uplink_netdev; };
enum mlx5_sw_icm_type { @@@ -1226,7 -1231,7 +1231,7 @@@ enum MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, };
-static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; diff --combined include/net/sock.h index 8487f58da36d,8b4155e756c2..cadcc12cc316 --- a/include/net/sock.h +++ b/include/net/sock.h @@@ -934,13 -934,9 +934,13 @@@ static inline void sk_acceptq_added(str WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); }
+/* Note: If you think the test should be: + * return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog); + * Then please take a look at commit 64a146513f8f ("[NET]: Revert incorrect accept queue backlog changes.") + */ static inline bool sk_acceptq_is_full(const struct sock *sk) { - return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog); + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); }
/* @@@ -1188,6 -1184,9 +1188,9 @@@ struct proto void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + #ifdef CONFIG_BPF_SYSCALL + int (*psock_update_sk_prot)(struct sock *sk, bool restore); + #endif
/* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@@ -2225,15 -2224,6 +2228,15 @@@ static inline void skb_set_owner_r(stru sk_mem_charge(sk, skb->truesize); }
+static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) +{ + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { + skb_orphan(skb); + skb->destructor = sock_efree; + skb->sk = sk; + } +} + void sk_reset_timer(struct sock *sk, struct timer_list *timer, unsigned long expires);
diff --combined include/uapi/linux/ethtool.h index 5afea692a3f7,868b513d4f54..f91e079e3108 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@@ -26,14 -26,6 +26,14 @@@ * have the same layout for 32-bit and 64-bit userland. */
+/* Note on reserved space. + * Reserved fields must not be accessed directly by user space because + * they may be replaced by a different field in the future. They must + * be initialized to zero before making the request, e.g. via memset + * of the entire structure or implicitly by not being set in a structure + * initializer. + */ + /** * struct ethtool_cmd - DEPRECATED, link control and status * This structure is DEPRECATED, please use struct ethtool_link_settings. @@@ -75,7 -67,6 +75,7 @@@ * and other link features that the link partner advertised * through autonegotiation; 0 if unknown or not applicable. * Read-only. + * @reserved: Reserved for future use; see the note on reserved space. * * The link speed in Mbps is split between @speed and @speed_hi. Use * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to @@@ -164,7 -155,6 +164,7 @@@ static inline __u32 ethtool_cmd_speed(c * @bus_info: Device bus address. This should match the dev_name() * string for the underlying bus device, if there is one. May be * an empty string. + * @reserved2: Reserved for future use; see the note on reserved space. * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and * %ETHTOOL_SPFLAGS commands; also the number of strings in the * %ETH_SS_PRIV_FLAGS set @@@ -366,7 -356,6 +366,7 @@@ struct ethtool_eeprom * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting * its tx lpi (after reaching 'idle' state). Effective only when eee * was negotiated and tx_lpi_enabled was set. + * @reserved: Reserved for future use; see the note on reserved space. */ struct ethtool_eee { __u32 cmd; @@@ -385,7 -374,6 +385,7 @@@ * @cmd: %ETHTOOL_GMODULEINFO * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx * @eeprom_len: Length of the eeprom + * @reserved: Reserved for future use; see the note on reserved space. * * This structure is used to return the information to * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. @@@ -591,7 -579,9 +591,7 @@@ struct ethtool_pauseparam __u32 tx_pause; };
-/** - * enum ethtool_link_ext_state - link extended state - */ +/* Link extended state */ enum ethtool_link_ext_state { ETHTOOL_LINK_EXT_STATE_AUTONEG, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, @@@ -605,7 -595,10 +605,7 @@@ ETHTOOL_LINK_EXT_STATE_OVERHEAT, };
-/** - * enum ethtool_link_ext_substate_autoneg - more information in addition to - * ETHTOOL_LINK_EXT_STATE_AUTONEG. - */ +/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ enum ethtool_link_ext_substate_autoneg { ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, @@@ -615,7 -608,9 +615,7 @@@ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, };
-/** - * enum ethtool_link_ext_substate_link_training - more information in addition to - * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. +/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. */ enum ethtool_link_ext_substate_link_training { ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, @@@ -624,7 -619,9 +624,7 @@@ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, };
-/** - * enum ethtool_link_ext_substate_logical_mismatch - more information in addition - * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. +/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. */ enum ethtool_link_ext_substate_link_logical_mismatch { ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, @@@ -634,14 -631,19 +634,14 @@@ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, };
-/** - * enum ethtool_link_ext_substate_bad_signal_integrity - more information in - * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. +/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. */ enum ethtool_link_ext_substate_bad_signal_integrity { ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, };
-/** - * enum ethtool_link_ext_substate_cable_issue - more information in - * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. - */ +/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ enum ethtool_link_ext_substate_cable_issue { ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, @@@ -659,7 -661,6 +659,7 @@@ * now deprecated * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_TUNABLES: tunable names * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS * @ETH_SS_PHY_TUNABLES: PHY tunable names * @ETH_SS_LINK_MODES: link mode names @@@ -669,8 -670,6 +669,8 @@@ * @ETH_SS_TS_TX_TYPES: timestamping Tx types * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types + * + * @ETH_SS_COUNT: number of defined string sets */ enum ethtool_stringset { ETH_SS_TEST = 0, @@@ -716,7 -715,6 +716,7 @@@ struct ethtool_gstrings /** * struct ethtool_sset_info - string set information * @cmd: Command number = %ETHTOOL_GSSET_INFO + * @reserved: Reserved for future use; see the note on reserved space. * @sset_mask: On entry, a bitmask of string sets to query, with bits * numbered according to &enum ethtool_stringset. On return, a * bitmask of those string sets queried that are supported. @@@ -761,7 -759,6 +761,7 @@@ enum ethtool_test_flags * @flags: A bitmask of flags from &enum ethtool_test_flags. Some * flags may be set by the user on entry; others may be set by * the driver on return. + * @reserved: Reserved for future use; see the note on reserved space. * @len: On return, the number of test results * @data: Array of test results * @@@ -962,7 -959,6 +962,7 @@@ union ethtool_flow_union * @vlan_etype: VLAN EtherType * @vlan_tci: VLAN tag control information * @data: user defined data + * @padding: Reserved for future use; see the note on reserved space. * * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT * is set in &struct ethtool_rx_flow_spec @flow_type. @@@ -1138,8 -1134,7 +1138,8 @@@ struct ethtool_rxfh_indir * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. - * @rsvd: Reserved for future extensions. + * @rsvd8: Reserved for future use; see the note on reserved space. + * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table * of @indir_size __u32 elements, followed by hash key of @key_size * bytes. @@@ -1307,9 -1302,7 +1307,9 @@@ struct ethtool_sfeatures * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @tx_reserved: Reserved for future use; see the note on reserved space. * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + * @rx_reserved: Reserved for future use; see the note on reserved space. * * The bits in the 'tx_types' and 'rx_filters' fields correspond to * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, @@@ -1383,15 -1376,33 +1383,33 @@@ struct ethtool_per_queue_op };
/** - * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * struct ethtool_fecparam - Ethernet Forward Error Correction parameters * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM - * @active_fec: FEC mode which is active on porte - * @fec: Bitmask of supported/configured FEC modes - * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * @active_fec: FEC mode which is active on the port, single bit set, GET only. + * @fec: Bitmask of configured FEC modes. + * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET. * - * Drivers should reject a non-zero setting of @autoneg when - * autoneogotiation is disabled (or not supported) for the link. + * Note that @reserved was never validated on input and ethtool user space + * left it uninitialized when calling SET. Hence going forward it can only be + * used to return a value to userspace with GET. + * + * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS. + * FEC settings are configured by link autonegotiation whenever it's enabled. + * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode. + * + * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings. + * It is recommended that drivers only accept a single bit set in @fec. + * When multiple bits are set in @fec drivers may pick mode in an implementation + * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other + * FEC modes, because it's unclear whether in this case other modes constrain + * AUTO or are independent choices. + * Drivers must reject SET requests if they support none of the requested modes. + * + * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead + * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM. * + * See enum ethtool_fec_config_bits for definition of valid bits for both + * @fec and @active_fec. */ struct ethtool_fecparam { __u32 cmd; @@@ -1403,11 -1414,16 +1421,16 @@@
/** * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration - * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported - * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver - * @ETHTOOL_FEC_OFF: No FEC Mode - * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode - * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not + * be used together with other bits. GET only. + * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually + * based link mode and SFP parameters read from module's + * EEPROM. This bit does _not_ mean autonegotiation. + * @ETHTOOL_FEC_OFF_BIT: No FEC Mode + * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode + * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode + * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet + * Consortium) */ enum ethtool_fec_config_bits { ETHTOOL_FEC_NONE_BIT, @@@ -1965,11 -1981,6 +1988,11 @@@ enum ethtool_reset_flags * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. + * @master_slave_cfg: Master/slave port mode. + * @master_slave_state: Master/slave port state. + * @reserved: Reserved for future use; see the note on reserved space. + * @reserved1: Reserved for future use; see the note on reserved space. + * @link_mode_masks: Variable length bitmaps. * * If autonegotiation is disabled, the speed and @duplex represent the * fixed link mode and are writable if the driver supports multiple diff --combined init/Kconfig index 8588d1b0a42c,5deae45b8d81..76fceba69b4e --- a/init/Kconfig +++ b/init/Kconfig @@@ -1,4 -1,13 +1,4 @@@ # SPDX-License-Identifier: GPL-2.0-only -config DEFCONFIG_LIST - string - depends on !UML - option defconfig_list - default "/lib/modules/$(shell,uname -r)/.config" - default "/etc/kernel-config" - default "/boot/config-$(shell,uname -r)" - default "arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)" - config CC_VERSION_TEXT string default "$(CC_VERSION_TEXT)" @@@ -32,18 -41,6 +32,18 @@@ config CLANG_VERSIO default $(cc-version) if CC_IS_CLANG default 0
+config AS_IS_GNU + def_bool $(success,test "$(as-name)" = GNU) + +config AS_IS_LLVM + def_bool $(success,test "$(as-name)" = LLVM) + +config AS_VERSION + int + # Use clang version if this is the integrated assembler + default CLANG_VERSION if AS_IS_LLVM + default $(as-version) + config LD_IS_BFD def_bool $(success,test "$(ld-name)" = BFD)
@@@ -1711,6 -1708,7 +1711,7 @@@ config BPF_SYSCAL select BPF select IRQ_WORK select TASKS_TRACE_RCU + select NET_SOCK_MSG if INET default n help Enable the bpf() system call that allows to manipulate eBPF @@@ -1780,6 -1778,7 +1781,6 @@@ config DEBUG_RSE
config EMBEDDED bool "Embedded system" - option allnoconfig_y select EXPERT help This option should be enabled if compiling the kernel for @@@ -2054,7 -2053,7 +2055,7 @@@ config MODULE_SIG_FORMA
menuconfig MODULES bool "Enable loadable module support" - option modules + modules help Kernel modules are small pieces of compiled code which can be inserted in the running kernel, rather than being @@@ -2215,53 -2214,40 +2216,53 @@@ config MODULE_SIG_HAS default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512
-config MODULE_COMPRESS - bool "Compress modules on installation" +choice + prompt "Module compression mode" help + This option allows you to choose the algorithm which will be used to + compress modules when 'make modules_install' is run. (or, you can + choose to not compress modules at all.)
- Compresses kernel modules when 'make modules_install' is run; gzip or - xz depending on "Compression algorithm" below. + External modules will also be compressed in the same way during the + installation.
- module-init-tools MAY support gzip, and kmod MAY support gzip and xz. + For modules inside an initrd or initramfs, it's more efficient to + compress the whole initrd or initramfs instead.
- Out-of-tree kernel modules installed using Kbuild will also be - compressed upon installation. + This is fully compatible with signed modules.
- Note: for modules inside an initrd or initramfs, it's more efficient - to compress the whole initrd or initramfs instead. + Please note that the tool used to load modules needs to support the + corresponding algorithm. module-init-tools MAY support gzip, and kmod + MAY support gzip, xz and zstd.
- Note: This is fully compatible with signed modules. + Your build system needs to provide the appropriate compression tool + to compress the modules.
- If in doubt, say N. + If in doubt, select 'None'.
-choice - prompt "Compression algorithm" - depends on MODULE_COMPRESS - default MODULE_COMPRESS_GZIP +config MODULE_COMPRESS_NONE + bool "None" help - This determines which sort of compression will be used during - 'make modules_install'. - - GZIP (default) and XZ are supported. + Do not compress modules. The installed modules are suffixed + with .ko.
config MODULE_COMPRESS_GZIP bool "GZIP" + help + Compress modules with GZIP. The installed modules are suffixed + with .ko.gz.
config MODULE_COMPRESS_XZ bool "XZ" + help + Compress modules with XZ. The installed modules are suffixed + with .ko.xz. + +config MODULE_COMPRESS_ZSTD + bool "ZSTD" + help + Compress modules with ZSTD. The installed modules are suffixed + with .ko.zst.
endchoice
diff --combined kernel/bpf/disasm.c index faa54d58972c,dad821c8ecd0..bbfc6bb79240 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@@ -19,16 -19,23 +19,23 @@@ static const char *__func_get_name(cons { BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
- if (insn->src_reg != BPF_PSEUDO_CALL && + if (!insn->src_reg && insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && func_id_str[insn->imm]) return func_id_str[insn->imm];
- if (cbs && cbs->cb_call) - return cbs->cb_call(cbs->private_data, insn); + if (cbs && cbs->cb_call) { + const char *res; + + res = cbs->cb_call(cbs->private_data, insn); + if (res) + return res; + }
if (insn->src_reg == BPF_PSEUDO_CALL) snprintf(buff, len, "%+d", insn->imm); + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + snprintf(buff, len, "kernel-function");
return buff; } @@@ -84,7 -91,7 +91,7 @@@ static const char *const bpf_atomic_alu [BPF_ADD >> 4] = "add", [BPF_AND >> 4] = "and", [BPF_OR >> 4] = "or", - [BPF_XOR >> 4] = "or", + [BPF_XOR >> 4] = "xor", };
static const char *const bpf_ldst_string[] = { diff --combined kernel/bpf/verifier.c index 3a738724a380,852541a435ef..f63b27574b3a --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -234,6 -234,18 +234,18 @@@ static bool bpf_pseudo_call(const struc insn->src_reg == BPF_PSEUDO_CALL; }
+ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) + { + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == BPF_PSEUDO_KFUNC_CALL; + } + + static bool bpf_pseudo_func(const struct bpf_insn *insn) + { + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; + } + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@@ -248,6 -260,7 +260,7 @@@ u32 btf_id; struct btf *ret_btf; u32 ret_btf_id; + u32 subprogno; };
struct btf *btf_vmlinux; @@@ -390,6 -403,24 +403,24 @@@ __printf(3, 4) static void verbose_linf env->prev_linfo = linfo; }
+ static void verbose_invalid_scalar(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + struct tnum *range, const char *ctx, + const char *reg_name) + { + char tn_buf[48]; + + verbose(env, "At %s the register %s ", ctx, reg_name); + if (!tnum_is_unknown(reg->var_off)) { + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "has value %s", tn_buf); + } else { + verbose(env, "has unknown scalar value"); + } + tnum_strn(tn_buf, sizeof(tn_buf), *range); + verbose(env, " should have been in %s\n", tn_buf); + } + static bool type_is_pkt_pointer(enum bpf_reg_type type) { return type == PTR_TO_PACKET || @@@ -409,6 -440,7 +440,7 @@@ static bool reg_type_not_null(enum bpf_ return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || + type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON; }
@@@ -451,7 -483,8 +483,8 @@@ static bool arg_type_may_be_null(enum b type == ARG_PTR_TO_MEM_OR_NULL || type == ARG_PTR_TO_CTX_OR_NULL || type == ARG_PTR_TO_SOCKET_OR_NULL || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || + type == ARG_PTR_TO_STACK_OR_NULL; }
/* Determine whether the function releases some resources allocated by another @@@ -541,6 -574,8 +574,8 @@@ static const char * const reg_type_str[ [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", [PTR_TO_RDWR_BUF] = "rdwr_buf", [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", };
static char slot_type_char[] = { @@@ -612,6 -647,7 +647,7 @@@ static void print_verifier_state(struc if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || + t == PTR_TO_MAP_KEY || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL) verbose(env, ",ks=%d,vs=%d", @@@ -1519,39 -1555,210 +1555,210 @@@ static int add_subprog(struct bpf_verif } ret = find_subprog(env, off); if (ret >= 0) - return 0; + return ret; if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { verbose(env, "too many subprograms\n"); return -E2BIG; } + /* determine subprog starts. The end is one before the next starts */ env->subprog_info[env->subprog_cnt++].start = off; sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL); + return env->subprog_cnt - 1; + } + + struct bpf_kfunc_desc { + struct btf_func_model func_model; + u32 func_id; + s32 imm; + }; + + #define MAX_KFUNC_DESCS 256 + struct bpf_kfunc_desc_tab { + struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; + u32 nr_descs; + }; + + static int kfunc_desc_cmp_by_id(const void *a, const void *b) + { + const struct bpf_kfunc_desc *d0 = a; + const struct bpf_kfunc_desc *d1 = b; + + /* func_id is not greater than BTF_MAX_TYPE */ + return d0->func_id - d1->func_id; + } + + static const struct bpf_kfunc_desc * + find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) + { + struct bpf_kfunc_desc desc = { + .func_id = func_id, + }; + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + return bsearch(&desc, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); + } + + static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) + { + const struct btf_type *func, *func_proto; + struct bpf_kfunc_desc_tab *tab; + struct bpf_prog_aux *prog_aux; + struct bpf_kfunc_desc *desc; + const char *func_name; + unsigned long addr; + int err; + + prog_aux = env->prog->aux; + tab = prog_aux->kfunc_tab; + if (!tab) { + if (!btf_vmlinux) { + verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); + return -ENOTSUPP; + } + + if (!env->prog->jit_requested) { + verbose(env, "JIT is required for calling kernel function\n"); + return -ENOTSUPP; + } + + if (!bpf_jit_supports_kfunc_call()) { + verbose(env, "JIT does not support calling kernel function\n"); + return -ENOTSUPP; + } + + if (!env->prog->gpl_compatible) { + verbose(env, "cannot call kernel function from non-GPL compatible program\n"); + return -EINVAL; + } + + tab = kzalloc(sizeof(*tab), GFP_KERNEL); + if (!tab) + return -ENOMEM; + prog_aux->kfunc_tab = tab; + } + + if (find_kfunc_desc(env->prog, func_id)) + return 0; + + if (tab->nr_descs == MAX_KFUNC_DESCS) { + verbose(env, "too many different kernel function calls\n"); + return -E2BIG; + } + + func = btf_type_by_id(btf_vmlinux, func_id); + if (!func || !btf_type_is_func(func)) { + verbose(env, "kernel btf_id %u is not a function\n", + func_id); + return -EINVAL; + } + func_proto = btf_type_by_id(btf_vmlinux, func->type); + if (!func_proto || !btf_type_is_func_proto(func_proto)) { + verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", + func_id); + return -EINVAL; + } + + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + addr = kallsyms_lookup_name(func_name); + if (!addr) { + verbose(env, "cannot find address for kernel function %s\n", + func_name); + return -EINVAL; + } + + desc = &tab->descs[tab->nr_descs++]; + desc->func_id = func_id; + desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base; + err = btf_distill_func_proto(&env->log, btf_vmlinux, + func_proto, func_name, + &desc->func_model); + if (!err) + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id, NULL); + return err; + } + + static int kfunc_desc_cmp_by_imm(const void *a, const void *b) + { + const struct bpf_kfunc_desc *d0 = a; + const struct bpf_kfunc_desc *d1 = b; + + if (d0->imm > d1->imm) + return 1; + else if (d0->imm < d1->imm) + return -1; return 0; }
- static int check_subprogs(struct bpf_verifier_env *env) + static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) + { + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + if (!tab) + return; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_imm, NULL); + } + + bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) + { + return !!prog->aux->kfunc_tab; + } + + const struct btf_func_model * + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn) + { + const struct bpf_kfunc_desc desc = { + .imm = insn->imm, + }; + const struct bpf_kfunc_desc *res; + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + res = bsearch(&desc, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); + + return res ? &res->func_model : NULL; + } + + static int add_subprog_and_kfunc(struct bpf_verifier_env *env) { - int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; + int i, ret, insn_cnt = env->prog->len;
/* Add entry function. */ ret = add_subprog(env, 0); - if (ret < 0) + if (ret) return ret;
- /* determine subprog starts. The end is one before the next starts */ - for (i = 0; i < insn_cnt; i++) { - if (!bpf_pseudo_call(insn + i)) + for (i = 0; i < insn_cnt; i++, insn++) { + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) && + !bpf_pseudo_kfunc_call(insn)) continue; + if (!env->bpf_capable) { - verbose(env, - "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); + verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); return -EPERM; } - ret = add_subprog(env, i + insn[i].imm + 1); + + if (bpf_pseudo_func(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + if (ret >= 0) + /* remember subprog */ + insn[1].imm = ret; + } else if (bpf_pseudo_call(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + } else { + ret = add_kfunc_call(env, insn->imm); + } + if (ret < 0) return ret; } @@@ -1565,6 -1772,16 +1772,16 @@@ for (i = 0; i < env->subprog_cnt; i++) verbose(env, "func#%d @%d\n", i, subprog[i].start);
+ return 0; + } + + static int check_subprogs(struct bpf_verifier_env *env) + { + int i, subprog_start, subprog_end, off, cur_subprog = 0; + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + /* now check that all jumps are within the same subprog */ subprog_start = subprog[cur_subprog].start; subprog_end = subprog[cur_subprog + 1].start; @@@ -1873,6 -2090,17 +2090,17 @@@ static int get_prev_insn_idx(struct bpf return i; }
+ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) + { + const struct btf_type *func; + + if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) + return NULL; + + func = btf_type_by_id(btf_vmlinux, insn->imm); + return btf_name_by_offset(btf_vmlinux, func->name_off); + } + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@@ -1881,6 -2109,7 +2109,7 @@@ static int backtrack_insn(struct bpf_ve u32 *reg_mask, u64 *stack_mask) { const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, .cb_print = verbose, .private_data = env, }; @@@ -2295,6 -2524,8 +2524,8 @@@ static bool is_spillable_regtype(enum b case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: case PTR_TO_MEM_OR_NULL: + case PTR_TO_FUNC: + case PTR_TO_MAP_KEY: return true; default: return false; @@@ -2899,6 -3130,10 +3130,10 @@@ static int __check_mem_access(struct bp
reg = &cur_regs(env)[regno]; switch (reg->type) { + case PTR_TO_MAP_KEY: + verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", + mem_size, off, size); + break; case PTR_TO_MAP_VALUE: verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size); @@@ -3304,6 -3539,9 +3539,9 @@@ static int check_ptr_alignment(struct b case PTR_TO_FLOW_KEYS: pointer_desc = "flow keys "; break; + case PTR_TO_MAP_KEY: + pointer_desc = "key "; + break; case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@@ -3405,7 -3643,7 +3643,7 @@@ process_func continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { - if (!bpf_pseudo_call(insn + i)) + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ ret_insn[frame] = i + 1; @@@ -3842,7 -4080,19 +4080,19 @@@ static int check_mem_access(struct bpf_ /* for access checks, reg->off is just part of off */ off += reg->off;
- if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_KEY) { + if (t == BPF_WRITE) { + verbose(env, "write to change key R%d not allowed\n", regno); + return -EACCES; + } + + err = check_mem_region_access(env, regno, off, size, + reg->map_ptr->key_size, false); + if (err) + return err; + if (value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@@ -4258,6 -4508,9 +4508,9 @@@ static int check_helper_mem_access(stru case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); + case PTR_TO_MAP_KEY: + return check_mem_region_access(env, regno, reg->off, access_size, + reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: if (check_map_access_type(env, regno, reg->off, access_size, meta && meta->raw_mode ? BPF_WRITE : @@@ -4474,6 -4727,7 +4727,7 @@@ static const struct bpf_reg_types map_k PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@@ -4505,6 -4759,7 +4759,7 @@@ static const struct bpf_reg_types mem_t PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, PTR_TO_RDONLY_BUF, @@@ -4517,6 -4772,7 +4772,7 @@@ static const struct bpf_reg_types int_p PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@@ -4529,6 -4785,8 +4785,8 @@@ static const struct bpf_reg_types const static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; + static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; + static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@@ -4557,6 -4815,8 +4815,8 @@@ [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, + [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, };
static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@@ -4738,6 -4998,8 +4998,8 @@@ skip_type_check verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_FUNC) { + meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. @@@ -5258,13 -5520,19 +5520,19 @@@ static void clear_caller_saved_regs(str } }
- static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx) + typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx); + + static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) { struct bpf_verifier_state *state = env->cur_state; struct bpf_func_info_aux *func_info_aux; struct bpf_func_state *caller, *callee; - int i, err, subprog, target_insn; + int err; bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) { @@@ -5273,14 -5541,6 +5541,6 @@@ return -E2BIG; }
- target_insn = *insn_idx + insn->imm; - subprog = find_subprog(env, target_insn + 1); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", - target_insn + 1); - return -EFAULT; - } - caller = state->frame[state->curframe]; if (state->frame[state->curframe + 1]) { verbose(env, "verifier bug. Frame %d already allocated\n", @@@ -5291,7 -5551,7 +5551,7 @@@ func_info_aux = env->prog->aux->func_info_aux; if (func_info_aux) is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, subprog, caller->regs); + err = btf_check_subprog_arg_match(env, subprog, caller->regs); if (err == -EFAULT) return err; if (is_global) { @@@ -5335,11 -5595,9 +5595,9 @@@ if (err) return err;
- /* copy r1 - r5 args that callee can access. The copy includes parent - * pointers, which connects us up to the liveness chain - */ - for (i = BPF_REG_1; i <= BPF_REG_5; i++) - callee->regs[i] = caller->regs[i]; + err = set_callee_state_cb(env, caller, callee, *insn_idx); + if (err) + return err;
clear_caller_saved_regs(env, caller->regs);
@@@ -5347,7 -5605,7 +5605,7 @@@ state->curframe++;
/* and go analyze first insn of the callee */ - *insn_idx = target_insn; + *insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); @@@ -5358,6 -5616,92 +5616,92 @@@ return 0; }
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee) + { + /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, + * void *callback_ctx, u64 flags); + * callback_fn(struct bpf_map *map, void *key, void *value, + * void *callback_ctx); + */ + callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr; + + /* pointer to stack or null */ + callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + return 0; + } + + static int set_callee_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, int insn_idx) + { + int i; + + /* copy r1 - r5 args that callee can access. The copy includes parent + * pointers, which connects us up to the liveness chain + */ + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + callee->regs[i] = caller->regs[i]; + return 0; + } + + static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) + { + int subprog, target_insn; + + target_insn = *insn_idx + insn->imm + 1; + subprog = find_subprog(env, target_insn); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", + target_insn); + return -EFAULT; + } + + return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); + } + + static int set_map_elem_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) + { + struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map; + int err; + + if (bpf_map_ptr_poisoned(insn_aux)) { + verbose(env, "tail_call abusing map_ptr\n"); + return -EINVAL; + } + + map = BPF_MAP_PTR(insn_aux->map_ptr_state); + if (!map->ops->map_set_for_each_callback_args || + !map->ops->map_for_each_callback) { + verbose(env, "callback function not allowed for map\n"); + return -ENOTSUPP; + } + + err = map->ops->map_set_for_each_callback_args(env, caller, callee); + if (err) + return err; + + callee->in_callback_fn = true; + return 0; + } + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@@ -5380,8 -5724,22 +5724,22 @@@
state->curframe--; caller = state->frame[state->curframe]; - /* return to the caller whatever r0 had in the callee */ - caller->regs[BPF_REG_0] = *r0; + if (callee->in_callback_fn) { + /* enforce R0 return value range [0, 1]. */ + struct tnum range = tnum_range(0, 1); + + if (r0->type != SCALAR_VALUE) { + verbose(env, "R0 not a scalar value\n"); + return -EACCES; + } + if (!tnum_in(range, r0->var_off)) { + verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + return -EINVAL; + } + } else { + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; + }
/* Transfer references to the caller */ err = transfer_reference_state(caller, callee); @@@ -5436,7 -5794,9 +5794,9 @@@ record_func_map(struct bpf_verifier_en func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem && - func_id != BPF_FUNC_map_peek_elem) + func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_for_each_map_elem && + func_id != BPF_FUNC_redirect_map) return 0;
if (map == NULL) { @@@ -5517,15 -5877,18 +5877,18 @@@ static int check_reference_leak(struct return state->acquired_refs ? -EINVAL : 0; }
- static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; + int insn_idx = *insn_idx_p; bool changes_data; - int i, err; + int i, err, func_id;
/* find function prototype */ + func_id = insn->imm; if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); @@@ -5571,7 -5934,7 +5934,7 @@@
meta.func_id = func_id; /* check args */ - for (i = 0; i < 5; i++) { + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { err = check_func_arg(env, i, &meta, fn); if (err) return err; @@@ -5621,6 -5984,13 +5984,13 @@@ return -EINVAL; }
+ if (func_id == BPF_FUNC_for_each_map_elem) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); + if (err < 0) + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@@ -5776,6 -6146,98 +6146,98 @@@ return 0; }
+ /* mark_btf_func_reg_size() is used when the reg size is determined by + * the BTF func_proto's return value size and argument. + */ + static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, + size_t reg_size) + { + struct bpf_reg_state *reg = &cur_regs(env)[regno]; + + if (regno == BPF_REG_0) { + /* Function return value */ + reg->live |= REG_LIVE_WRITTEN; + reg->subreg_def = reg_size == sizeof(u64) ? + DEF_NOT_SUBREG : env->insn_idx + 1; + } else { + /* Function argument */ + if (reg_size == sizeof(u64)) { + mark_insn_zext(env, reg); + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + } else { + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32); + } + } + } + + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) + { + const struct btf_type *t, *func, *func_proto, *ptr_type; + struct bpf_reg_state *regs = cur_regs(env); + const char *func_name, *ptr_type_name; + u32 i, nargs, func_id, ptr_type_id; + const struct btf_param *args; + int err; + + func_id = insn->imm; + func = btf_type_by_id(btf_vmlinux, func_id); + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_proto = btf_type_by_id(btf_vmlinux, func->type); + + if (!env->ops->check_kfunc_call || + !env->ops->check_kfunc_call(func_id)) { + verbose(env, "calling kernel function %s is not allowed\n", + func_name); + return -EACCES; + } + + /* Check the arguments */ + err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + if (err) + return err; + + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(env, regs, caller_saved[i]); + + /* Check return type */ + t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + if (btf_type_is_scalar(t)) { + mark_reg_unknown(env, regs, BPF_REG_0); + mark_btf_func_reg_size(env, BPF_REG_0, t->size); + } else if (btf_type_is_ptr(t)) { + ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + &ptr_type_id); + if (!btf_type_is_struct(ptr_type)) { + ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type->name_off); + verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", + func_name, btf_type_str(ptr_type), + ptr_type_name); + return -EINVAL; + } + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].type = PTR_TO_BTF_ID; + regs[BPF_REG_0].btf_id = ptr_type_id; + mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ + + nargs = btf_type_vlen(func_proto); + args = (const struct btf_param *)(func_proto + 1); + for (i = 0; i < nargs; i++) { + u32 regno = i + 1; + + t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + if (btf_type_is_ptr(t)) + mark_btf_func_reg_size(env, regno, sizeof(void *)); + else + /* scalar. ensured by btf_check_kfunc_arg_match() */ + mark_btf_func_reg_size(env, regno, t->size); + } + + return 0; + } + static bool signed_add_overflows(s64 a, s64 b) { /* Do the add in u64, where overflow is well-defined */ @@@ -5909,7 -6371,7 +6371,7 @@@ static int update_alu_sanitation_state( aux->alu_limit != alu_limit)) return -EACCES;
- /* Corresponding fixup done in fixup_bpf_calls(). */ + /* Corresponding fixup done in do_misc_fixups(). */ aux->alu_state = alu_state; aux->alu_limit = alu_limit; return 0; @@@ -8263,6 -8725,24 +8725,24 @@@ static int check_ld_imm(struct bpf_veri return 0; }
+ if (insn->src_reg == BPF_PSEUDO_FUNC) { + struct bpf_prog_aux *aux = env->prog->aux; + u32 subprogno = insn[1].imm; + + if (!aux->func_info) { + verbose(env, "missing btf func_info\n"); + return -EINVAL; + } + if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { + verbose(env, "callback function not static\n"); + return -EINVAL; + } + + dst_reg->type = PTR_TO_FUNC; + dst_reg->subprogno = subprogno; + return 0; + } + map = env->used_maps[aux->map_index]; mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; @@@ -8491,17 -8971,7 +8971,7 @@@ static int check_return_code(struct bpf }
if (!tnum_in(range, reg->var_off)) { - char tn_buf[48]; - - verbose(env, "At program exit the register R0 "); - if (!tnum_is_unknown(reg->var_off)) { - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "has value %s", tn_buf); - } else { - verbose(env, "has unknown scalar value"); - } - tnum_strn(tn_buf, sizeof(tn_buf), range); - verbose(env, " should have been in %s\n", tn_buf); + verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); return -EINVAL; }
@@@ -8628,6 -9098,27 +9098,27 @@@ static int push_insn(int t, int w, int return DONE_EXPLORING; }
+ static int visit_func_call_insn(int t, int insn_cnt, + struct bpf_insn *insns, + struct bpf_verifier_env *env, + bool visit_callee) + { + int ret; + + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + if (ret) + return ret; + + if (t + 1 < insn_cnt) + init_explored_state(env, t + 1); + if (visit_callee) { + init_explored_state(env, t); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, + env, false); + } + return ret; + } + /* Visits the instruction at index t and returns one of the following: * < 0 - an error occurred * DONE_EXPLORING - the instruction was fully explored @@@ -8638,6 -9129,9 +9129,9 @@@ static int visit_insn(int t, int insn_c struct bpf_insn *insns = env->prog->insnsi; int ret;
+ if (bpf_pseudo_func(insns + t)) + return visit_func_call_insn(t, insn_cnt, insns, env, true); + /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insns[t].code) != BPF_JMP && BPF_CLASS(insns[t].code) != BPF_JMP32) @@@ -8648,18 -9142,8 +9142,8 @@@ return DONE_EXPLORING;
case BPF_CALL: - ret = push_insn(t, t + 1, FALLTHROUGH, env, false); - if (ret) - return ret; - - if (t + 1 < insn_cnt) - init_explored_state(env, t + 1); - if (insns[t].src_reg == BPF_PSEUDO_CALL) { - init_explored_state(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, - env, false); - } - return ret; + return visit_func_call_insn(t, insn_cnt, insns, env, + insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA: if (BPF_SRC(insns[t].code) != BPF_K) @@@ -9272,6 -9756,7 +9756,7 @@@ static bool regsafe(struct bpf_reg_stat */ return false; } + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. @@@ -9955,6 -10440,7 +10440,7 @@@ static int do_check(struct bpf_verifier
if (env->log.level & BPF_LOG_LEVEL) { const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, .cb_print = verbose, .private_data = env, }; @@@ -10102,7 -10588,8 +10588,8 @@@ if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 || (insn->src_reg != BPF_REG_0 && - insn->src_reg != BPF_PSEUDO_CALL) || + insn->src_reg != BPF_PSEUDO_CALL && + insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) { verbose(env, "BPF_CALL uses reserved fields\n"); @@@ -10117,11 -10604,12 +10604,12 @@@ } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + err = check_kfunc_call(env, insn); else - err = check_helper_call(env, insn->imm, env->insn_idx); + err = check_helper_call(env, insn, &env->insn_idx); if (err) return err; - } else if (opcode == BPF_JA) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || @@@ -10550,6 -11038,12 +11038,12 @@@ static int resolve_pseudo_ldimm64(struc goto next_insn; }
+ if (insn[0].src_reg == BPF_PSEUDO_FUNC) { + aux = &env->insn_aux_data[i]; + aux->ptr_type = PTR_TO_FUNC; + goto next_insn; + } + /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ @@@ -10682,9 -11176,13 +11176,13 @@@ static void convert_pseudo_ld_imm64(str int insn_cnt = env->prog->len; int i;
- for (i = 0; i < insn_cnt; i++, insn++) - if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) - insn->src_reg = 0; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + if (insn->src_reg == BPF_PSEUDO_FUNC) + continue; + insn->src_reg = 0; + } }
/* single env->prog->insni[off] instruction was replaced with the range @@@ -11323,6 -11821,12 +11821,12 @@@ static int jit_subprogs(struct bpf_veri return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + env->insn_aux_data[i].call_imm = insn->imm; + /* subprog is encoded in insn[1].imm */ + continue; + } + if (!bpf_pseudo_call(insn)) continue; /* Upon error here we cannot fall back to interpreter but @@@ -11412,6 -11916,7 +11916,7 @@@ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@@ -11452,6 -11957,12 +11957,12 @@@ for (i = 0; i < env->subprog_cnt; i++) { insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { + if (bpf_pseudo_func(insn)) { + subprog = insn[1].imm; + insn[0].imm = (u32)(long)func[subprog]->bpf_func; + insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; + continue; + } if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; @@@ -11497,6 -12008,11 +12008,11 @@@ * later look the same as if they were interpreted only. */ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + insn[0].imm = env->insn_aux_data[i].call_imm; + insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + continue; + } if (!bpf_pseudo_call(insn)) continue; insn->off = env->insn_aux_data[i].call_imm; @@@ -11508,7 -12024,7 +12024,7 @@@ prog->bpf_func = func[0]->bpf_func; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; - bpf_prog_free_unused_jited_linfo(prog); + bpf_prog_jit_attempt_done(prog); return 0; out_free: for (i = 0; i < env->subprog_cnt; i++) { @@@ -11531,7 -12047,7 +12047,7 @@@ out_undo_insn insn->off = 0; insn->imm = env->insn_aux_data[i].call_imm; } - bpf_prog_free_jited_linfo(prog); + bpf_prog_jit_attempt_done(prog); return err; }
@@@ -11540,6 -12056,7 +12056,7 @@@ static int fixup_call_args(struct bpf_v #ifndef CONFIG_BPF_JIT_ALWAYS_ON struct bpf_prog *prog = env->prog; struct bpf_insn *insn = prog->insnsi; + bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); int i, depth; #endif int err = 0; @@@ -11553,6 -12070,10 +12070,10 @@@ return err; } #ifndef CONFIG_BPF_JIT_ALWAYS_ON + if (has_kfunc_call) { + verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); + return -EINVAL; + } if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { /* When JIT fails the progs with bpf2bpf calls and tail_calls * have to be rejected, since interpreter doesn't support them yet. @@@ -11561,6 -12082,14 +12082,14 @@@ return -EINVAL; } for (i = 0; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* When JIT fails the progs with callback calls + * have to be rejected, since interpreter doesn't support them yet. + */ + verbose(env, "callbacks are not allowed in non-JITed programs\n"); + return -EINVAL; + } + if (!bpf_pseudo_call(insn)) continue; depth = get_callee_stack_depth(env, insn, i); @@@ -11573,12 -12102,30 +12102,30 @@@ return err; }
- /* fixup insn->imm field of bpf_call instructions - * and inline eligible helpers as explicit sequence of BPF instructions - * - * this function is called after eBPF program passed verification + static int fixup_kfunc_call(struct bpf_verifier_env *env, + struct bpf_insn *insn) + { + const struct bpf_kfunc_desc *desc; + + /* insn->imm has the btf func_id. Replace it with + * an address (relative to __bpf_base_call). + */ + desc = find_kfunc_desc(env->prog, insn->imm); + if (!desc) { + verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", + insn->imm); + return -EFAULT; + } + + insn->imm = desc->imm; + + return 0; + } + + /* Do various post-verification rewrites in a single program pass. + * These rewrites simplify JIT and interpreter implementations. */ - static int fixup_bpf_calls(struct bpf_verifier_env *env) + static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; bool expect_blinding = bpf_jit_blinding_enabled(prog); @@@ -11593,6 -12140,7 +12140,7 @@@ int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) { + /* Make divide-by-zero exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || @@@ -11633,6 -12181,7 +12181,7 @@@ continue; }
+ /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) { @@@ -11652,11 -12201,11 +12201,11 @@@ continue; }
+ /* Rewrite pointer arithmetic to mitigate speculation attacks. */ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; - struct bpf_insn insn_buf[16]; struct bpf_insn *patch = &insn_buf[0]; bool issrc, isneg; u32 off_reg; @@@ -11708,6 -12257,12 +12257,12 @@@ continue; if (insn->src_reg == BPF_PSEUDO_CALL) continue; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + ret = fixup_kfunc_call(env, insn); + if (ret) + return ret; + continue; + }
if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; @@@ -11800,7 -12355,8 +12355,8 @@@ insn->imm == BPF_FUNC_map_delete_elem || insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || - insn->imm == BPF_FUNC_map_peek_elem)) { + insn->imm == BPF_FUNC_map_peek_elem || + insn->imm == BPF_FUNC_redirect_map)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@@ -11842,6 -12398,9 +12398,9 @@@ (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map *map, void *value))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_redirect, + (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: @@@ -11868,11 -12427,16 +12427,16 @@@ insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - __bpf_call_base; continue; + case BPF_FUNC_redirect_map: + insn->imm = BPF_CAST_CALL(ops->map_redirect) - + __bpf_call_base; + continue; }
goto patch_call_imm; }
+ /* Implement bpf_jiffies64 inline. */ if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_jiffies64) { struct bpf_insn ld_jiffies_addr[2] = { @@@ -11928,6 -12492,8 +12492,8 @@@ patch_call_imm } }
+ sort_kfunc_descs_by_imm(env->prog); + return 0; }
@@@ -12038,7 -12604,7 +12604,7 @@@ static int do_check_common(struct bpf_v /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; mark_reg_known_zero(env, regs, BPF_REG_1); - ret = btf_check_func_arg_match(env, subprog, regs); + ret = btf_check_subprog_arg_match(env, subprog, regs); if (ret == -EFAULT) /* unlikely verifier bug. abort. * ret == 0 and ret < 0 are sadly acceptable for @@@ -12158,11 -12724,6 +12724,11 @@@ static int check_struct_ops_btf_id(stru u32 btf_id, member_idx; const char *mname;
+ if (!prog->gpl_compatible) { + verbose(env, "struct ops programs must have a GPL compatible license\n"); + return -EINVAL; + } + btf_id = prog->aux->attach_btf_id; st_ops = bpf_struct_ops_find(btf_id); if (!st_ops) { @@@ -12638,6 -13199,10 +13204,10 @@@ int bpf_check(struct bpf_prog **prog, u if (!env->explored_states) goto skip_full_check;
+ ret = add_subprog_and_kfunc(env); + if (ret < 0) + goto skip_full_check; + ret = check_subprogs(env); if (ret < 0) goto skip_full_check; @@@ -12688,7 -13253,7 +13258,7 @@@ skip_full_check ret = convert_ctx_accesses(env);
if (ret == 0) - ret = fixup_bpf_calls(env); + ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. diff --combined kernel/fork.c index 426cd0c51f9e,50209691f21a..85ca68f4b01e --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -96,6 -96,7 +96,7 @@@ #include <linux/kasan.h> #include <linux/scs.h> #include <linux/io_uring.h> + #include <linux/bpf.h>
#include <asm/pgalloc.h> #include <linux/uaccess.h> @@@ -734,6 -735,7 +735,7 @@@ void __put_task_struct(struct task_stru cgroup_free(tsk); task_numa_free(tsk, true); security_task_free(tsk); + bpf_task_storage_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); @@@ -1948,14 -1950,8 +1950,14 @@@ static __latent_entropy struct task_str p = dup_task_struct(current, node); if (!p) goto fork_out; - if (args->io_thread) + if (args->io_thread) { + /* + * Mark us an IO worker, and block any signal that isn't + * fatal or STOP + */ p->flags |= PF_IO_WORKER; + siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); + }
/* * This _must_ happen before we call free_task(), i.e. before we jump @@@ -2078,6 -2074,9 +2080,9 @@@ p->sequential_io = 0; p->sequential_io_avg = 0; #endif + #ifdef CONFIG_BPF_SYSCALL + RCU_INIT_POINTER(p->bpf_storage, NULL); + #endif
/* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); @@@ -2444,8 -2443,14 +2449,8 @@@ struct task_struct *create_io_thread(in .stack_size = (unsigned long)arg, .io_thread = 1, }; - struct task_struct *tsk;
- tsk = copy_process(NULL, 0, node, &args); - if (!IS_ERR(tsk)) { - sigfillset(&tsk->blocked); - sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); - } - return tsk; + return copy_process(NULL, 0, node, &args); }
/* diff --combined kernel/sysctl.c index 4b6b9de89da8,90d2892ef6a3..7a299e90f94b --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@@ -148,9 -148,6 +148,9 @@@ static unsigned long hung_task_timeout_ #ifdef CONFIG_INOTIFY_USER #include <linux/inotify.h> #endif +#ifdef CONFIG_FANOTIFY +#include <linux/fanotify.h> +#endif
#ifdef CONFIG_PROC_SYSCTL
@@@ -1037,6 -1034,65 +1037,65 @@@ int proc_douintvec_minmax(struct ctl_ta do_proc_douintvec_minmax_conv, ¶m); }
+ /** + * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars + * values from/to the user buffer, treated as an ASCII string. Negative + * strings are not allowed. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success or an error on write when the range check fails. + */ + int proc_dou8vec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { + struct ctl_table tmp; + unsigned int min = 0, max = 255U, val; + u8 *data = table->data; + struct do_proc_douintvec_minmax_conv_param param = { + .min = &min, + .max = &max, + }; + int res; + + /* Do not support arrays yet. */ + if (table->maxlen != sizeof(u8)) + return -EINVAL; + + if (table->extra1) { + min = *(unsigned int *) table->extra1; + if (min > 255U) + return -EINVAL; + } + if (table->extra2) { + max = *(unsigned int *) table->extra2; + if (max > 255U) + return -EINVAL; + } + + tmp = *table; + + tmp.maxlen = sizeof(val); + tmp.data = &val; + val = *data; + res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, + do_proc_douintvec_minmax_conv, ¶m); + if (res) + return res; + if (write) + *data = val; + return 0; + } + EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); + static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, unsigned int *valp, int write, void *data) @@@ -1585,6 -1641,12 +1644,12 @@@ int proc_douintvec_minmax(struct ctl_ta return -ENOSYS; }
+ int proc_dou8vec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { + return -ENOSYS; + } + int proc_dointvec_jiffies(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@@ -3261,14 -3323,7 +3326,14 @@@ static struct ctl_table fs_table[] = .mode = 0555, .child = inotify_table, }, -#endif +#endif +#ifdef CONFIG_FANOTIFY + { + .procname = "fanotify", + .mode = 0555, + .child = fanotify_table, + }, +#endif #ifdef CONFIG_EPOLL { .procname = "epoll", diff --combined net/core/skmsg.c index 5def3a2e85be,92a83c02562a..43ce17a6a585 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@@ -399,6 -399,104 +399,104 @@@ out } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+ int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, + long timeo, int *err) + { + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || + !skb_queue_empty(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; + } + EXPORT_SYMBOL_GPL(sk_msg_wait_data); + + /* Receive sk_msg from psock->ingress_msg to @msg. */ + int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + int len, int flags) + { + struct iov_iter *iter = &msg->msg_iter; + int peek = flags & MSG_PEEK; + struct sk_msg *msg_rx; + int i, copied = 0; + + msg_rx = sk_psock_peek_msg(psock); + while (copied != len) { + struct scatterlist *sge; + + if (unlikely(!msg_rx)) + break; + + i = msg_rx->sg.start; + do { + struct page *page; + int copy; + + sge = sk_msg_elem(msg_rx, i); + copy = sge->length; + page = sg_page(sge); + if (copied + copy > len) + copy = len - copied; + copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (!copy) + return copied ? copied : -EFAULT; + + copied += copy; + if (likely(!peek)) { + sge->offset += copy; + sge->length -= copy; + if (!msg_rx->skb) + sk_mem_uncharge(sk, copy); + msg_rx->sg.size -= copy; + + if (!sge->length) { + sk_msg_iter_var_next(i); + if (!msg_rx->skb) + put_page(page); + } + } else { + /* Lets not optimize peek case if copy_page_to_iter + * didn't copy the entire length lets just break. + */ + if (copy != sge->length) + return copied; + sk_msg_iter_var_next(i); + } + + if (copied == len) + break; + } while (i != msg_rx->sg.end); + + if (unlikely(peek)) { + msg_rx = sk_psock_next_msg(psock, msg_rx); + if (!msg_rx) + break; + continue; + } + + msg_rx->sg.start = i; + if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + msg_rx = sk_psock_dequeue_msg(psock); + kfree_sk_msg(msg_rx); + } + msg_rx = sk_psock_peek_msg(psock); + } + + return copied; + } + EXPORT_SYMBOL_GPL(sk_msg_recvmsg); + static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { @@@ -410,7 -508,7 +508,7 @@@ if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); if (unlikely(!msg)) return NULL;
@@@ -488,7 -586,6 +586,7 @@@ static int sk_psock_skb_ingress_self(st if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); + skb_set_owner_r(skb, sk); return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); }
@@@ -498,7 -595,7 +596,7 @@@ static int sk_psock_handle_skb(struct s if (!ingress) { if (!sock_writeable(psock->sk)) return -EAGAIN; - return skb_send_sock_locked(psock->sk, skb, off, len); + return skb_send_sock(psock->sk, skb, off, len); } return sk_psock_skb_ingress(psock, skb); } @@@ -512,8 -609,7 +610,7 @@@ static void sk_psock_backlog(struct wor u32 len, off; int ret;
- /* Lock sock to avoid losing sk_socket during loop. */ - lock_sock(psock->sk); + mutex_lock(&psock->work_mutex); if (state->skb) { skb = state->skb; len = state->len; @@@ -526,10 -622,11 +623,11 @@@ len = skb->len; off = 0; start: - ingress = tcp_skb_bpf_ingress(skb); + ingress = skb_bpf_ingress(skb); + skb_bpf_redirect_clear(skb); do { ret = -EIO; - if (likely(psock->sk->sk_socket)) + if (!sock_flag(psock->sk, SOCK_DEAD)) ret = sk_psock_handle_skb(psock, skb, off, len, ingress); if (ret <= 0) { @@@ -553,7 -650,7 +651,7 @@@ kfree_skb(skb); } end: - release_sock(psock->sk); + mutex_unlock(&psock->work_mutex); }
struct sk_psock *sk_psock_init(struct sock *sk, int node) @@@ -563,11 -660,6 +661,6 @@@
write_lock_bh(&sk->sk_callback_lock);
- if (inet_csk_has_ulp(sk)) { - psock = ERR_PTR(-EINVAL); - goto out; - } - if (sk->sk_user_data) { psock = ERR_PTR(-EBUSY); goto out; @@@ -591,7 -683,9 +684,9 @@@ spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog); + mutex_init(&psock->work_mutex); INIT_LIST_HEAD(&psock->ingress_msg); + spin_lock_init(&psock->ingress_lock); skb_queue_head_init(&psock->ingress_skb);
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); @@@ -619,7 -713,7 +714,7 @@@ struct sk_psock_link *sk_psock_link_pop return link; }
- void __sk_psock_purge_ingress_msg(struct sk_psock *psock) + static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) { struct sk_msg *msg, *tmp;
@@@ -630,9 -724,14 +725,14 @@@ } }
- static void sk_psock_zap_ingress(struct sk_psock *psock) + static void __sk_psock_zap_ingress(struct sk_psock *psock) { - __skb_queue_purge(&psock->ingress_skb); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) { + skb_bpf_redirect_clear(skb); + kfree_skb(skb); + } __sk_psock_purge_ingress_msg(psock); }
@@@ -646,23 -745,35 +746,35 @@@ static void sk_psock_link_destroy(struc } }
- static void sk_psock_destroy_deferred(struct work_struct *gc) + void sk_psock_stop(struct sk_psock *psock, bool wait) { - struct sk_psock *psock = container_of(gc, struct sk_psock, gc); + spin_lock_bh(&psock->ingress_lock); + sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); + sk_psock_cork_free(psock); + __sk_psock_zap_ingress(psock); + spin_unlock_bh(&psock->ingress_lock);
+ if (wait) + cancel_work_sync(&psock->work); + } + + static void sk_psock_done_strp(struct sk_psock *psock); + + static void sk_psock_destroy(struct work_struct *work) + { + struct sk_psock *psock = container_of(to_rcu_work(work), + struct sk_psock, rwork); /* No sk_callback_lock since already detached. */
- /* Parser has been stopped */ - if (psock->progs.skb_parser) - strp_done(&psock->parser.strp); + sk_psock_done_strp(psock);
cancel_work_sync(&psock->work); + mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
sk_psock_link_destroy(psock); sk_psock_cork_free(psock); - sk_psock_zap_ingress(psock);
if (psock->sk_redir) sock_put(psock->sk_redir); @@@ -670,30 -781,21 +782,21 @@@ kfree(psock); }
- static void sk_psock_destroy(struct rcu_head *rcu) - { - struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu); - - INIT_WORK(&psock->gc, sk_psock_destroy_deferred); - schedule_work(&psock->gc); - } - void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_cork_free(psock); - sk_psock_zap_ingress(psock); + sk_psock_stop(psock, false);
write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); - if (psock->progs.skb_parser) + if (psock->progs.stream_parser) sk_psock_stop_strp(sk, psock); - else if (psock->progs.skb_verdict) + else if (psock->progs.stream_verdict || psock->progs.skb_verdict) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); - sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- call_rcu(&psock->rcu, sk_psock_destroy); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); + queue_rcu_work(system_wq, &psock->rwork); } EXPORT_SYMBOL_GPL(sk_psock_drop);
@@@ -744,27 -846,12 +847,12 @@@ out } EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
- static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog, - struct sk_buff *skb) - { - bpf_compute_data_end_sk_skb(skb); - return bpf_prog_run_pin_on_cpu(prog, skb); - } - - static struct sk_psock *sk_psock_from_strp(struct strparser *strp) - { - struct sk_psock_parser *parser; - - parser = container_of(strp, struct sk_psock_parser, strp); - return container_of(parser, struct sk_psock, parser); - } - static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other;
- sk_other = tcp_skb_bpf_redirect_fetch(skb); + sk_other = skb_bpf_redirect_fetch(skb); /* This error is a buggy BPF program, it returned a redirect * return code, but then didn't set a redirect interface. */ @@@ -777,20 -864,27 +865,26 @@@ * error that caused the pipe to break. We can't send a packet on * a socket that is in this state so we drop the skb. */ - if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || - !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { + if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) { + kfree_skb(skb); + return; + } + spin_lock_bh(&psock_other->ingress_lock); + if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { + spin_unlock_bh(&psock_other->ingress_lock); kfree_skb(skb); return; }
skb_queue_tail(&psock_other->ingress_skb, skb); schedule_work(&psock_other->work); + spin_unlock_bh(&psock_other->ingress_lock); }
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict) { switch (verdict) { case __SK_REDIRECT: - skb_set_owner_r(skb, sk); sk_psock_skb_redirect(skb); break; case __SK_PASS: @@@ -806,12 -900,17 +900,13 @@@ int sk_psock_tls_strp_read(struct sk_ps int ret = __SK_PASS;
rcu_read_lock(); - prog = READ_ONCE(psock->progs.skb_verdict); + prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { - /* We skip full set_owner_r here because if we do a SK_PASS - * or SK_DROP we can skip skb memory accounting and use the - * TLS context. - */ skb->sk = psock->sk; - tcp_skb_bpf_redirect_clear(skb); - ret = sk_psock_bpf_run(psock, prog, skb); - ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + skb_dst_drop(skb); + skb_bpf_redirect_clear(skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } sk_psock_tls_verdict_apply(skb, psock->sk, ret); @@@ -823,7 -922,6 +918,6 @@@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_rea static void sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, int verdict) { - struct tcp_skb_cb *tcp; struct sock *sk_other; int err = -EIO;
@@@ -835,8 -933,7 +929,7 @@@ goto out_free; }
- tcp = TCP_SKB_CB(skb); - tcp->bpf.flags |= BPF_F_INGRESS; + skb_bpf_set_ingress(skb);
/* If the queue is empty then we can submit directly * into the msg queue. If its not empty we have to @@@ -848,8 -945,12 +941,12 @@@ err = sk_psock_skb_ingress_self(psock, skb); } if (err < 0) { - skb_queue_tail(&psock->ingress_skb, skb); - schedule_work(&psock->work); + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + skb_queue_tail(&psock->ingress_skb, skb); + schedule_work(&psock->work); + } + spin_unlock_bh(&psock->ingress_lock); } break; case __SK_REDIRECT: @@@ -862,6 -963,24 +959,24 @@@ out_free } }
+ static void sk_psock_write_space(struct sock *sk) + { + struct sk_psock *psock; + void (*write_space)(struct sock *sk) = NULL; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) { + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + schedule_work(&psock->work); + write_space = psock->saved_write_space; + } + rcu_read_unlock(); + if (write_space) + write_space(sk); + } + + #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { struct sk_psock *psock; @@@ -876,13 -995,13 +991,14 @@@ kfree_skb(skb); goto out; } - prog = READ_ONCE(psock->progs.skb_verdict); - skb_set_owner_r(skb, sk); + prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { + skb->sk = sk; - tcp_skb_bpf_redirect_clear(skb); - ret = sk_psock_bpf_run(psock, prog, skb); - ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + skb_dst_drop(skb); + skb_bpf_redirect_clear(skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); + skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: @@@ -896,15 -1015,15 +1012,15 @@@ static int sk_psock_strp_read_done(stru
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock = container_of(strp, struct sk_psock, strp); struct bpf_prog *prog; int ret = skb->len;
rcu_read_lock(); - prog = READ_ONCE(psock->progs.skb_parser); + prog = READ_ONCE(psock->progs.stream_parser); if (likely(prog)) { skb->sk = psock->sk; - ret = sk_psock_bpf_run(psock, prog, skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); skb->sk = NULL; } rcu_read_unlock(); @@@ -920,16 -1039,59 +1036,59 @@@ static void sk_psock_strp_data_ready(st psock = sk_psock(sk); if (likely(psock)) { if (tls_sw_has_ctx_rx(sk)) { - psock->parser.saved_data_ready(sk); + psock->saved_data_ready(sk); } else { write_lock_bh(&sk->sk_callback_lock); - strp_data_ready(&psock->parser.strp); + strp_data_ready(&psock->strp); write_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); }
+ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) + { + static const struct strp_callbacks cb = { + .rcv_msg = sk_psock_strp_read, + .read_sock_done = sk_psock_strp_read_done, + .parse_msg = sk_psock_strp_parse, + }; + + return strp_init(&psock->strp, sk, &cb); + } + + void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) + { + if (psock->saved_data_ready) + return; + + psock->saved_data_ready = sk->sk_data_ready; + sk->sk_data_ready = sk_psock_strp_data_ready; + sk->sk_write_space = sk_psock_write_space; + } + + void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) + { + if (!psock->saved_data_ready) + return; + + sk->sk_data_ready = psock->saved_data_ready; + psock->saved_data_ready = NULL; + strp_stop(&psock->strp); + } + + static void sk_psock_done_strp(struct sk_psock *psock) + { + /* Parser has been stopped */ + if (psock->progs.stream_parser) + strp_done(&psock->strp); + } + #else + static void sk_psock_done_strp(struct sk_psock *psock) + { + } + #endif /* CONFIG_BPF_STREAM_PARSER */ + static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, unsigned int offset, size_t orig_len) { @@@ -953,13 -1115,15 +1112,16 @@@ kfree_skb(skb); goto out; } - prog = READ_ONCE(psock->progs.skb_verdict); - skb_set_owner_r(skb, sk); + prog = READ_ONCE(psock->progs.stream_verdict); + if (!prog) + prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { + skb->sk = sk; - tcp_skb_bpf_redirect_clear(skb); - ret = sk_psock_bpf_run(psock, prog, skb); - ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + skb_dst_drop(skb); + skb_bpf_redirect_clear(skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); + skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: @@@ -982,82 -1146,21 +1144,21 @@@ static void sk_psock_verdict_data_ready sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv); }
- static void sk_psock_write_space(struct sock *sk) - { - struct sk_psock *psock; - void (*write_space)(struct sock *sk) = NULL; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) { - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - schedule_work(&psock->work); - write_space = psock->saved_write_space; - } - rcu_read_unlock(); - if (write_space) - write_space(sk); - } - - int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) - { - static const struct strp_callbacks cb = { - .rcv_msg = sk_psock_strp_read, - .read_sock_done = sk_psock_strp_read_done, - .parse_msg = sk_psock_strp_parse, - }; - - psock->parser.enabled = false; - return strp_init(&psock->parser.strp, sk, &cb); - } - void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) { - struct sk_psock_parser *parser = &psock->parser; - - if (parser->enabled) + if (psock->saved_data_ready) return;
- parser->saved_data_ready = sk->sk_data_ready; + psock->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = sk_psock_verdict_data_ready; sk->sk_write_space = sk_psock_write_space; - parser->enabled = true; - } - - void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) - { - struct sk_psock_parser *parser = &psock->parser; - - if (parser->enabled) - return; - - parser->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_strp_data_ready; - sk->sk_write_space = sk_psock_write_space; - parser->enabled = true; - } - - void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) - { - struct sk_psock_parser *parser = &psock->parser; - - if (!parser->enabled) - return; - - sk->sk_data_ready = parser->saved_data_ready; - parser->saved_data_ready = NULL; - strp_stop(&parser->strp); - parser->enabled = false; }
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) { - struct sk_psock_parser *parser = &psock->parser; - - if (!parser->enabled) + if (!psock->saved_data_ready) return;
- sk->sk_data_ready = parser->saved_data_ready; - parser->saved_data_ready = NULL; - parser->enabled = false; + sk->sk_data_ready = psock->saved_data_ready; + psock->saved_data_ready = NULL; } diff --combined net/ethtool/ioctl.c index 771688e1b0da,26b3e7086075..a9f67574148f --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@@ -426,13 -426,29 +426,13 @@@ struct ethtool_link_usettings int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { - const struct link_mode_info *link_info; - int err; - ASSERT_RTNL();
if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP;
memset(link_ksettings, 0, sizeof(*link_ksettings)); - - link_ksettings->link_mode = -1; - err = dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); - if (err) - return err; - - if (link_ksettings->link_mode != -1) { - link_info = &link_mode_params[link_ksettings->link_mode]; - link_ksettings->base.speed = link_info->speed; - link_ksettings->lanes = link_info->lanes; - link_ksettings->base.duplex = link_info->duplex; - } - - return 0; + return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); } EXPORT_SYMBOL(__ethtool_get_link_ksettings);
@@@ -1828,6 -1844,18 +1828,18 @@@ out return ret; }
+ __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...) + { + va_list args; + + va_start(args, fmt); + vsnprintf(*data, ETH_GSTRING_LEN, fmt, args); + va_end(args); + + *data += ETH_GSTRING_LEN; + } + EXPORT_SYMBOL(ethtool_sprintf); + static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; @@@ -2540,6 -2568,9 +2552,9 @@@ static int ethtool_get_fecparam(struct if (rc) return rc;
+ if (WARN_ON_ONCE(fecparam.reserved)) + fecparam.reserved = 0; + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) return -EFAULT; return 0; @@@ -2555,6 -2586,12 +2570,12 @@@ static int ethtool_set_fecparam(struct if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) return -EFAULT;
+ if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE) + return -EINVAL; + + fecparam.active_fec = 0; + fecparam.reserved = 0; + return dev->ethtool_ops->set_fecparam(dev, &fecparam); }
diff --combined net/ipv4/esp4.c index 4b834bbf95e0,1ae920b93f39..dd1c752ea122 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@@ -279,7 -279,7 +279,7 @@@ static void esp_output_done(struct cryp x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } }
@@@ -309,7 -309,7 +309,7 @@@ static struct ip_esp_hdr *esp_output_se struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { @@@ -854,7 -854,7 +854,7 @@@ static void esp_input_set_header(struc struct ip_esp_hdr *esph;
/* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { diff --combined net/ipv4/udp.c index 99d743eb9dc4,bfcc7f1a8a7f..15f5504adf5b --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@@ -1782,6 -1782,35 +1782,35 @@@ busy_check } EXPORT_SYMBOL(__skb_recv_udp);
+ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) + { + int copied = 0; + + while (1) { + struct sk_buff *skb; + int err, used; + + skb = skb_recv_udp(sk, 0, 1, &err); + if (!skb) + return err; + used = recv_actor(desc, skb, 0, skb->len); + if (used <= 0) { + if (!copied) + copied = used; + break; + } else if (used <= skb->len) { + copied += used; + } + + if (!desc->count) + break; + } + + return copied; + } + EXPORT_SYMBOL(udp_read_sock); + /* * This should be easy, if there is something there we * return it, otherwise we block. @@@ -2178,6 -2207,8 +2207,8 @@@ static int udp_queue_rcv_skb(struct soc segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + + udp_post_segment_fix_csum(skb); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); @@@ -2664,9 -2695,12 +2695,12 @@@ int udp_lib_setsockopt(struct sock *sk
case UDP_GRO: lock_sock(sk); + + /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk->sk_socket); up->gro_enabled = valbool; + up->accept_udp_l4 = valbool; release_sock(sk); break;
@@@ -2754,10 -2788,6 +2788,10 @@@ int udp_lib_getsockopt(struct sock *sk val = up->gso_size; break;
+ case UDP_GRO: + val = up->gro_enabled; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: @@@ -2853,6 -2883,9 +2887,9 @@@ struct proto udp_prot = .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, + #ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = udp_bpf_update_proto, + #endif .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), diff --combined net/ipv6/ip6_vti.c index e0cc32e45880,856e46ad0895..2d048e21abbb --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@@ -193,7 -193,6 +193,6 @@@ static int vti6_tnl_create2(struct net_
strcpy(t->parms.name, dev->name);
- dev_hold(dev); vti6_tnl_link(ip6n, t);
return 0; @@@ -494,7 -493,7 +493,7 @@@ vti6_xmit(struct sk_buff *skb, struct n }
if (dst->flags & DST_XFRM_QUEUE) - goto queued; + goto xmit;
x = dst->xfrm; if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) @@@ -523,8 -522,6 +522,8 @@@
icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { + if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } @@@ -533,7 -530,7 +532,7 @@@ goto tx_err_dst_release; }
-queued: +xmit: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; @@@ -934,6 -931,7 +933,7 @@@ static inline int vti6_dev_init_gen(str dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + dev_hold(dev); return 0; }
@@@ -965,7 -963,6 +965,6 @@@ static int __net_init vti6_fb_tnl_dev_i struct vti6_net *ip6n = net_generic(net, vti6_net_id);
t->parms.proto = IPPROTO_IPV6; - dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --combined net/mptcp/protocol.c index 4bde960e19dc,e894345d10c1..8009b3f8e4c1 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@@ -11,6 -11,7 +11,6 @@@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> -#include <linux/igmp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@@ -19,6 -20,7 +19,6 @@@ #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> -#include <net/addrconf.h> #endif #include <net/mptcp.h> #include <net/xfrm.h> @@@ -491,7 -493,7 +491,7 @@@ static bool mptcp_check_data_fin(struc u64 rcv_data_fin_seq; bool ret = false;
- if (__mptcp_check_fallback(msk) || !msk->first) + if (__mptcp_check_fallback(msk)) return ret;
/* Need to ack a DATA_FIN received from a peer while this side @@@ -2045,28 -2047,21 +2045,21 @@@ out_err return copied; }
- static void mptcp_retransmit_handler(struct sock *sk) - { - struct mptcp_sock *msk = mptcp_sk(sk); - - set_bit(MPTCP_WORK_RTX, &msk->flags); - mptcp_schedule_work(sk); - } - static void mptcp_retransmit_timer(struct timer_list *t) { struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk);
bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - mptcp_retransmit_handler(sk); + /* we need a process context to retransmit */ + if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags)) + mptcp_schedule_work(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, - &sk->sk_tsq_flags)) - sock_hold(sk); + set_bit(MPTCP_RETRANSMIT, &msk->flags); } bh_unlock_sock(sk); sock_put(sk); @@@ -2876,48 -2871,6 +2869,48 @@@ static int mptcp_setsockopt_v6(struct m return ret; }
+static bool mptcp_unsupported(int level, int optname) +{ + if (level == SOL_IP) { + switch (optname) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + if (level == SOL_IPV6) { + switch (optname) { + case IPV6_ADDRFORM: + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + return false; +} + static int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@@ -2926,9 -2879,6 +2919,9 @@@
pr_debug("msk=%p", msk);
+ if (mptcp_unsupported(level, optname)) + return -ENOPROTOOPT; + if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
@@@ -3001,17 -2951,16 +2994,16 @@@ void __mptcp_check_push(struct sock *sk } }
- #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) - /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) { - unsigned long flags, nflags; - for (;;) { - flags = 0; + unsigned long flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) flags |= BIT(MPTCP_PUSH_PENDING); + if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_RETRANSMIT); if (!flags) break;
@@@ -3026,6 -2975,8 +3018,8 @@@ spin_unlock_bh(&sk->sk_lock.slock); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_RETRANSMIT)) + __mptcp_retrans(sk);
cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@@ -3041,20 -2992,6 +3035,6 @@@ */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); - - do { - flags = sk->sk_tsq_flags; - if (!(flags & MPTCP_DEFERRED_ALL)) - return; - nflags = flags & ~MPTCP_DEFERRED_ALL; - } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - - sock_release_ownership(sk); - - if (flags & TCPF_WRITE_TIMER_DEFERRED) { - mptcp_retransmit_handler(sk); - __sock_put(sk); - } }
void mptcp_subflow_process_delegated(struct sock *ssk) @@@ -3153,14 -3090,18 +3133,18 @@@ bool mptcp_finish_join(struct sock *ssk pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */ - if (!mptcp_is_fully_established(parent)) + if (!mptcp_is_fully_established(parent)) { + subflow->reset_reason = MPTCP_RST_EMPTCP; return false; + }
if (!msk->pm.server_side) goto out;
- if (!mptcp_pm_allow_new_subflow(msk)) + if (!mptcp_pm_allow_new_subflow(msk)) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + }
/* active connections are already on conn_list, and we can't acquire * msk lock here. @@@ -3174,8 -3115,10 +3158,10 @@@ sock_hold(ssk); } spin_unlock_bh(&msk->join_list_lock); - if (!ret) + if (!ret) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + }
/* attach to msk socket only after we are sure he will deal with us * at close time @@@ -3287,8 -3230,12 +3273,12 @@@ static int mptcp_stream_connect(struct if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) mptcp_subflow_early_fallback(msk, subflow); #endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) + if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { + MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); + } + if (likely(!__mptcp_check_fallback(msk))) + MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); @@@ -3462,10 -3409,34 +3452,10 @@@ static __poll_t mptcp_poll(struct file return mask; }
-static int mptcp_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = sock->sk; - struct mptcp_sock *msk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - } - - release_sock(sk); - - return inet_release(sock); -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, - .release = mptcp_release, + .release = inet_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, @@@ -3557,10 -3528,35 +3547,10 @@@ void __init mptcp_proto_init(void }
#if IS_ENABLED(CONFIG_MPTCP_IPV6) -static int mptcp6_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk; - struct sock *sk = sock->sk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - ipv6_sock_mc_close(ssk); - ipv6_sock_ac_close(ssk); - } - - release_sock(sk); - return inet6_release(sock); -} - static const struct proto_ops mptcp_v6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, - .release = mptcp6_release, + .release = inet6_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, diff --combined net/netfilter/nf_conntrack_standalone.c index c6c0cb465664,3f2cc7b04b20..5deddb0947a9 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@@ -266,7 -266,6 +266,7 @@@ static const char* l4proto_name(u16 pro case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; + case IPPROTO_ICMPV6: return "icmpv6"; }
return "unknown"; @@@ -1028,6 -1027,7 +1028,7 @@@ static void nf_conntrack_standalone_ini
static int nf_conntrack_standalone_init_sysctl(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct nf_udp_net *un = nf_udp_pernet(net); struct ctl_table *table;
@@@ -1073,8 -1073,8 +1074,8 @@@ table[NF_SYSCTL_CT_BUCKETS].mode = 0444; }
- net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); - if (!net->ct.sysctl_header) + cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table); + if (!cnet->sysctl_header) goto out_unregister_netfilter;
return 0; @@@ -1086,10 -1086,11 +1087,11 @@@ out_unregister_netfilter
static void nf_conntrack_standalone_fini_sysctl(struct net *net) { + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct ctl_table *table;
- table = net->ct.sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.sysctl_header); + table = cnet->sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(cnet->sysctl_header); kfree(table); } #else diff --combined net/netfilter/nf_flow_table_offload.c index 1c5460e7bce8,7d0d128407be..4f1a145ff74b --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@@ -13,7 -13,9 +13,9 @@@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h>
- static struct workqueue_struct *nf_flow_offload_wq; + static struct workqueue_struct *nf_flow_offload_add_wq; + static struct workqueue_struct *nf_flow_offload_del_wq; + static struct workqueue_struct *nf_flow_offload_stats_wq;
struct flow_offload_work { struct list_head list; @@@ -175,28 -177,45 +177,45 @@@ static int flow_offload_eth_src(struct enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { - const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple; struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); - struct net_device *dev; + const struct flow_offload_tuple *other_tuple, *this_tuple; + struct net_device *dev = NULL; + const unsigned char *addr; u32 mask, val; u16 val16;
- dev = dev_get_by_index(net, tuple->iifidx); - if (!dev) - return -ENOENT; + this_tuple = &flow->tuplehash[dir].tuple; + + switch (this_tuple->xmit_type) { + case FLOW_OFFLOAD_XMIT_DIRECT: + addr = this_tuple->out.h_source; + break; + case FLOW_OFFLOAD_XMIT_NEIGH: + other_tuple = &flow->tuplehash[!dir].tuple; + dev = dev_get_by_index(net, other_tuple->iifidx); + if (!dev) + return -ENOENT; + + addr = dev->dev_addr; + break; + default: + return -EOPNOTSUPP; + }
mask = ~0xffff0000; - memcpy(&val16, dev->dev_addr, 2); + memcpy(&val16, addr, 2); val = val16 << 16; flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, &val, &mask);
mask = ~0xffffffff; - memcpy(&val, dev->dev_addr + 2, 4); + memcpy(&val, addr + 2, 4); flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, &val, &mask); - dev_put(dev); + + if (dev) + dev_put(dev);
return 0; } @@@ -208,27 -227,40 +227,40 @@@ static int flow_offload_eth_dst(struct { struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); - const void *daddr = &flow->tuplehash[!dir].tuple.src_v4; + const struct flow_offload_tuple *other_tuple, *this_tuple; const struct dst_entry *dst_cache; unsigned char ha[ETH_ALEN]; struct neighbour *n; + const void *daddr; u32 mask, val; u8 nud_state; u16 val16;
- dst_cache = flow->tuplehash[dir].tuple.dst_cache; - n = dst_neigh_lookup(dst_cache, daddr); - if (!n) - return -ENOENT; + this_tuple = &flow->tuplehash[dir].tuple;
- read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(ha, n->ha); - read_unlock_bh(&n->lock); - - if (!(nud_state & NUD_VALID)) { + switch (this_tuple->xmit_type) { + case FLOW_OFFLOAD_XMIT_DIRECT: + ether_addr_copy(ha, this_tuple->out.h_dest); + break; + case FLOW_OFFLOAD_XMIT_NEIGH: + other_tuple = &flow->tuplehash[!dir].tuple; + daddr = &other_tuple->src_v4; + dst_cache = this_tuple->dst_cache; + n = dst_neigh_lookup(dst_cache, daddr); + if (!n) + return -ENOENT; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(ha, n->ha); + read_unlock_bh(&n->lock); neigh_release(n); - return -ENOENT; + + if (!(nud_state & NUD_VALID)) + return -ENOENT; + break; + default: + return -EOPNOTSUPP; }
mask = ~0xffffffff; @@@ -241,7 -273,6 +273,6 @@@ val = val16; flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, &val, &mask); - neigh_release(n);
return 0; } @@@ -305,12 -336,12 +336,12 @@@ static void flow_offload_ipv6_mangle(st const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; - int i; + int i, j;
- for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { + for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, - offset + i, &addr[i], mask); + offset + i, &addr[j], mask); } }
@@@ -463,27 -494,52 +494,52 @@@ static void flow_offload_ipv4_checksum( } }
- static void flow_offload_redirect(const struct flow_offload *flow, + static void flow_offload_redirect(struct net *net, + const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { - struct flow_action_entry *entry = flow_action_entry_next(flow_rule); - struct rtable *rt; + const struct flow_offload_tuple *this_tuple, *other_tuple; + struct flow_action_entry *entry; + struct net_device *dev; + int ifindex; + + this_tuple = &flow->tuplehash[dir].tuple; + switch (this_tuple->xmit_type) { + case FLOW_OFFLOAD_XMIT_DIRECT: + this_tuple = &flow->tuplehash[dir].tuple; + ifindex = this_tuple->out.hw_ifidx; + break; + case FLOW_OFFLOAD_XMIT_NEIGH: + other_tuple = &flow->tuplehash[!dir].tuple; + ifindex = other_tuple->iifidx; + break; + default: + return; + }
- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; + dev = dev_get_by_index(net, ifindex); + if (!dev) + return; + + entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_REDIRECT; - entry->dev = rt->dst.dev; - dev_hold(rt->dst.dev); + entry->dev = dev; }
static void flow_offload_encap_tunnel(const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + const struct flow_offload_tuple *this_tuple; struct flow_action_entry *entry; struct dst_entry *dst;
- dst = flow->tuplehash[dir].tuple.dst_cache; + this_tuple = &flow->tuplehash[dir].tuple; + if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) + return; + + dst = this_tuple->dst_cache; if (dst && dst->lwtstate) { struct ip_tunnel_info *tun_info;
@@@ -500,10 -556,15 +556,15 @@@ static void flow_offload_decap_tunnel(c enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + const struct flow_offload_tuple *other_tuple; struct flow_action_entry *entry; struct dst_entry *dst;
- dst = flow->tuplehash[!dir].tuple.dst_cache; + other_tuple = &flow->tuplehash[!dir].tuple; + if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) + return; + + dst = other_tuple->dst_cache; if (dst && dst->lwtstate) { struct ip_tunnel_info *tun_info;
@@@ -515,10 -576,14 +576,14 @@@ } }
- int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) + static int + nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { + const struct flow_offload_tuple *other_tuple; + int i; + flow_offload_decap_tunnel(flow, dir, flow_rule); flow_offload_encap_tunnel(flow, dir, flow_rule);
@@@ -526,6 -591,39 +591,39 @@@ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1;
+ other_tuple = &flow->tuplehash[!dir].tuple; + + for (i = 0; i < other_tuple->encap_num; i++) { + struct flow_action_entry *entry; + + if (other_tuple->in_vlan_ingress & BIT(i)) + continue; + + entry = flow_action_entry_next(flow_rule); + + switch (other_tuple->encap[i].proto) { + case htons(ETH_P_PPP_SES): + entry->id = FLOW_ACTION_PPPOE_PUSH; + entry->pppoe.sid = other_tuple->encap[i].id; + break; + case htons(ETH_P_8021Q): + entry->id = FLOW_ACTION_VLAN_PUSH; + entry->vlan.vid = other_tuple->encap[i].id; + entry->vlan.proto = other_tuple->encap[i].proto; + break; + } + } + + return 0; + } + + int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) + { + if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) + return -1; + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { flow_offload_ipv4_snat(net, flow, dir, flow_rule); flow_offload_port_snat(net, flow, dir, flow_rule); @@@ -538,7 -636,7 +636,7 @@@ test_bit(NF_FLOW_DNAT, &flow->flags)) flow_offload_ipv4_checksum(net, flow, flow_rule);
- flow_offload_redirect(flow, dir, flow_rule); + flow_offload_redirect(net, flow, dir, flow_rule);
return 0; } @@@ -548,11 -646,7 +646,7 @@@ int nf_flow_rule_route_ipv6(struct net enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { - flow_offload_decap_tunnel(flow, dir, flow_rule); - flow_offload_encap_tunnel(flow, dir, flow_rule); - - if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || - flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) + if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) return -1;
if (test_bit(NF_FLOW_SNAT, &flow->flags)) { @@@ -564,7 -658,7 +658,7 @@@ flow_offload_port_dnat(net, flow, dir, flow_rule); }
- flow_offload_redirect(flow, dir, flow_rule); + flow_offload_redirect(net, flow, dir, flow_rule);
return 0; } @@@ -578,10 -672,10 +672,10 @@@ nf_flow_offload_rule_alloc(struct net * enum flow_offload_tuple_dir dir) { const struct nf_flowtable *flowtable = offload->flowtable; + const struct flow_offload_tuple *tuple, *other_tuple; const struct flow_offload *flow = offload->flow; - const struct flow_offload_tuple *tuple; + struct dst_entry *other_dst = NULL; struct nf_flow_rule *flow_rule; - struct dst_entry *other_dst; int err = -ENOMEM;
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); @@@ -597,7 -691,10 +691,10 @@@ flow_rule->rule->match.key = &flow_rule->match.key;
tuple = &flow->tuplehash[dir].tuple; - other_dst = flow->tuplehash[!dir].tuple.dst_cache; + other_tuple = &flow->tuplehash[!dir].tuple; + if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) + other_dst = other_tuple->dst_cache; + err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); if (err < 0) goto err_flow_match; @@@ -826,7 -923,12 +923,12 @@@ static void flow_offload_work_handler(s
static void flow_offload_queue_work(struct flow_offload_work *offload) { - queue_work(nf_flow_offload_wq, &offload->work); + if (offload->cmd == FLOW_CLS_REPLACE) + queue_work(nf_flow_offload_add_wq, &offload->work); + else if (offload->cmd == FLOW_CLS_DESTROY) + queue_work(nf_flow_offload_del_wq, &offload->work); + else + queue_work(nf_flow_offload_stats_wq, &offload->work); }
static struct flow_offload_work * @@@ -898,8 -1000,11 +1000,11 @@@ void nf_flow_offload_stats(struct nf_fl
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { - if (nf_flowtable_hw_offload(flowtable)) - flush_workqueue(nf_flow_offload_wq); + if (nf_flowtable_hw_offload(flowtable)) { + flush_workqueue(nf_flow_offload_add_wq); + flush_workqueue(nf_flow_offload_del_wq); + flush_workqueue(nf_flow_offload_stats_wq); + } }
static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, @@@ -1011,15 -1116,33 +1116,33 @@@ EXPORT_SYMBOL_GPL(nf_flow_table_offload
int nf_flow_table_offload_init(void) { - nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload", - WQ_UNBOUND, 0); - if (!nf_flow_offload_wq) + nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add", + WQ_UNBOUND | WQ_SYSFS, 0); + if (!nf_flow_offload_add_wq) return -ENOMEM;
+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del", + WQ_UNBOUND | WQ_SYSFS, 0); + if (!nf_flow_offload_del_wq) + goto err_del_wq; + + nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats", + WQ_UNBOUND | WQ_SYSFS, 0); + if (!nf_flow_offload_stats_wq) + goto err_stats_wq; + return 0; + + err_stats_wq: + destroy_workqueue(nf_flow_offload_del_wq); + err_del_wq: + destroy_workqueue(nf_flow_offload_add_wq); + return -ENOMEM; }
void nf_flow_table_offload_exit(void) { - destroy_workqueue(nf_flow_offload_wq); + destroy_workqueue(nf_flow_offload_add_wq); + destroy_workqueue(nf_flow_offload_del_wq); + destroy_workqueue(nf_flow_offload_stats_wq); } diff --combined net/openvswitch/conntrack.c index d217bd91176b,c29b0ef1fc27..cadb6a29b285 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@@ -809,8 -809,7 +809,7 @@@ static int ovs_ct_nat_execute(struct sk
err = nf_nat_packet(ct, ctinfo, hooknum, skb); push: - skb_push(skb, nh_off); - skb_postpush_rcsum(skb, skb->data, nh_off); + skb_push_rcsum(skb, nh_off);
return err; } @@@ -1322,8 -1321,7 +1321,7 @@@ int ovs_ct_execute(struct net *net, str else err = ovs_ct_lookup(net, key, info, skb);
- skb_push(skb, nh_ofs); - skb_postpush_rcsum(skb, skb->data, nh_ofs); + skb_push_rcsum(skb, nh_ofs); if (err) kfree_skb(skb); return err; @@@ -2034,10 -2032,10 +2032,10 @@@ static int ovs_ct_limit_del_zone_limit( static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info, struct sk_buff *reply) { - struct ovs_zone_limit zone_limit; - - zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE; - zone_limit.limit = info->default_limit; + struct ovs_zone_limit zone_limit = { + .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, + .limit = info->default_limit, + };
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); } diff --combined net/qrtr/qrtr.c index 1e4fb568fa84,4b46c69e14ab..c0477bec09bd --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@@ -20,6 -20,8 +20,8 @@@ /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff + #define QRTR_EPH_PORT_RANGE \ + XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
/** * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 @@@ -106,8 -108,7 +108,7 @@@ static LIST_HEAD(qrtr_all_nodes) static DEFINE_MUTEX(qrtr_node_lock);
/* local port allocation management */ - static DEFINE_IDR(qrtr_ports); - static DEFINE_MUTEX(qrtr_port_lock); + static DEFINE_XARRAY_ALLOC(qrtr_ports);
/** * struct qrtr_node - endpoint node @@@ -271,10 -272,7 +272,10 @@@ static int qrtr_tx_wait(struct qrtr_nod flow = kzalloc(sizeof(*flow), GFP_KERNEL); if (flow) { init_waitqueue_head(&flow->resume_tx); - radix_tree_insert(&node->qrtr_tx_flow, key, flow); + if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { + kfree(flow); + flow = NULL; + } } } mutex_unlock(&node->qrtr_tx_lock); @@@ -656,7 -654,7 +657,7 @@@ static struct qrtr_sock *qrtr_port_look port = 0;
rcu_read_lock(); - ipc = idr_find(&qrtr_ports, port); + ipc = xa_load(&qrtr_ports, port); if (ipc) sock_hold(&ipc->sk); rcu_read_unlock(); @@@ -698,9 -696,7 +699,7 @@@ static void qrtr_port_remove(struct qrt
__sock_put(&ipc->sk);
- mutex_lock(&qrtr_port_lock); - idr_remove(&qrtr_ports, port); - mutex_unlock(&qrtr_port_lock); + xa_erase(&qrtr_ports, port);
/* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ @@@ -719,29 -715,20 +718,20 @@@ */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { - u32 min_port; int rc;
- mutex_lock(&qrtr_port_lock); if (!*port) { - min_port = QRTR_MIN_EPH_SOCKET; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE, + GFP_KERNEL); } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { - min_port = 0; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC); + rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL); } else { - min_port = *port; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL); } - mutex_unlock(&qrtr_port_lock);
- if (rc == -ENOSPC) + if (rc == -EBUSY) return -EADDRINUSE; else if (rc < 0) return rc; @@@ -755,20 -742,16 +745,16 @@@ static void qrtr_reset_ports(void) { struct qrtr_sock *ipc; - int id; - - mutex_lock(&qrtr_port_lock); - idr_for_each_entry(&qrtr_ports, ipc, id) { - /* Don't reset control port */ - if (id == 0) - continue; + unsigned long index;
+ rcu_read_lock(); + xa_for_each_start(&qrtr_ports, index, ipc, 1) { sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; ipc->sk.sk_error_report(&ipc->sk); sock_put(&ipc->sk); } - mutex_unlock(&qrtr_port_lock); + rcu_read_unlock(); }
/* Bind socket to address. diff --combined net/rds/send.c index fe5264b9d4b3,53444397de66..ee7214ea0fdb --- a/net/rds/send.c +++ b/net/rds/send.c @@@ -665,7 -665,7 +665,7 @@@ static void rds_send_remove_from_sock(s unlock_and_drop: spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); - if (was_on_sock) + if (was_on_sock && rm) rds_message_put(rm); }
@@@ -1225,7 -1225,7 +1225,7 @@@ int rds_sendmsg(struct socket *sock, st } /* If the socket is already bound to a link local address, * it can only send to peers on the same link. But allow - * communicating beween link local and non-link local address. + * communicating between link local and non-link local address. */ if (scope_id != rs->rs_bound_scope_id) { if (!scope_id) { diff --combined net/sched/cls_api.c index 9332ec6863e8,d3db70865d66..8d24aeb4f1ab --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@@ -646,7 -646,7 +646,7 @@@ static void tc_block_indr_cleanup(struc struct net_device *dev = block_cb->indr.dev; struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; - struct flow_block_offload bo; + struct flow_block_offload bo = {};
tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, @@@ -3662,6 -3662,9 +3662,9 @@@ int tc_setup_flow_action(struct flow_ac entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.burst_pkt = tcf_police_burst_pkt(act); + entry->police.rate_pkt_ps = + tcf_police_rate_pkt_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); entry->police.index = act->tcfa_index; } else if (is_tcf_ct(act)) { diff --combined net/tipc/crypto.c index 97710ce36047,76b8428c94a7..e5c43d4d5a75 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@@ -317,7 -317,7 +317,7 @@@ static int tipc_aead_key_generate(struc
#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \ do { \ - typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr), \ + struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \ lockdep_is_held(lock)); \ rcu_assign_pointer((rcu_ptr), (ptr)); \ tipc_aead_put(__tmp); \ @@@ -798,7 -798,7 +798,7 @@@ static int tipc_aead_encrypt(struct tip ehdr = (struct tipc_ehdr *)skb->data; salt = aead->salt; if (aead->mode == CLUSTER_KEY) - salt ^= ehdr->addr; /* __be32 */ + salt ^= __be32_to_cpu(ehdr->addr); else if (__dnode) salt ^= tipc_node_get_addr(__dnode); memcpy(iv, &salt, 4); @@@ -929,7 -929,7 +929,7 @@@ static int tipc_aead_decrypt(struct ne ehdr = (struct tipc_ehdr *)skb->data; salt = aead->salt; if (aead->mode == CLUSTER_KEY) - salt ^= ehdr->addr; /* __be32 */ + salt ^= __be32_to_cpu(ehdr->addr); else if (ehdr->destined) salt ^= tipc_own_addr(net); memcpy(iv, &salt, 4); @@@ -1492,6 -1492,8 +1492,8 @@@ int tipc_crypto_start(struct tipc_crypt /* Allocate statistic structure */ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { + if (c->wq) + destroy_workqueue(c->wq); kfree_sensitive(c); return -ENOMEM; } @@@ -1941,22 -1943,21 +1943,22 @@@ static void tipc_crypto_rcv_complete(st goto rcv; if (tipc_aead_clone(&tmp, aead) < 0) goto rcv; + WARN_ON(!refcount_inc_not_zero(&tmp->refcnt)); if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) { tipc_aead_free(&tmp->rcu); goto rcv; } tipc_aead_put(aead); - aead = tipc_aead_get((struct tipc_aead __force __rcu *)tmp); + aead = tmp; }
if (unlikely(err)) { - tipc_aead_users_dec(aead, INT_MIN); + tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN); goto free_skb; }
/* Set the RX key's user */ - tipc_aead_users_set(aead, 1); + tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
/* Mark this point, RX works */ rx->timer1 = jiffies; diff --combined net/tipc/net.c index faf6bf554514,3f927949bb23..a130195af188 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@@ -89,7 -89,7 +89,7 @@@ * - A spin lock to protect the registry of kernel/driver users (reg.c) * - A global spin_lock (tipc_port_lock), which only task is to ensure * consistency where more than one port is involved in an operation, - * i.e., whe a port is part of a linked list of ports. + * i.e., when a port is part of a linked list of ports. * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * @@@ -125,6 -125,11 +125,11 @@@ int tipc_net_init(struct net *net, u8 * static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); + struct tipc_socket_addr sk = {0, addr}; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + TIPC_NODE_STATE, addr, addr);
if (cmpxchg(&tn->node_addr, 0, addr)) return; @@@ -132,8 -137,7 +137,7 @@@ tipc_named_reinit(net); tipc_sk_reinit(net); tipc_mon_reinit_self(net); - tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr, - TIPC_CLUSTER_SCOPE, 0, addr); + tipc_nametbl_publish(net, &ua, &sk, addr); }
void tipc_net_finalize_work(struct work_struct *work) diff --combined net/tipc/node.c index e0ee83263a39,707d0dc71fad..8217905348f4 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@@ -372,42 -372,49 +372,49 @@@ static struct tipc_node *tipc_node_find }
static void tipc_node_read_lock(struct tipc_node *n) + __acquires(n->lock) { read_lock_bh(&n->lock); }
static void tipc_node_read_unlock(struct tipc_node *n) + __releases(n->lock) { read_unlock_bh(&n->lock); }
static void tipc_node_write_lock(struct tipc_node *n) + __acquires(n->lock) { write_lock_bh(&n->lock); }
static void tipc_node_write_unlock_fast(struct tipc_node *n) + __releases(n->lock) { write_unlock_bh(&n->lock); }
static void tipc_node_write_unlock(struct tipc_node *n) + __releases(n->lock) { + struct tipc_socket_addr sk; struct net *net = n->net; - u32 addr = 0; u32 flags = n->action_flags; - u32 link_id = 0; - u32 bearer_id; struct list_head *publ_list; + struct tipc_uaddr ua; + u32 bearer_id;
if (likely(!flags)) { write_unlock_bh(&n->lock); return; }
- addr = n->addr; - link_id = n->link_id; - bearer_id = link_id & 0xffff; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + TIPC_LINK_STATE, n->addr, n->addr); + sk.ref = n->link_id; + sk.node = n->addr; + bearer_id = n->link_id & 0xffff; publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@@ -416,20 -423,18 +423,18 @@@ write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr, n->capabilities); + tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr, n->capabilities); + tipc_named_node_up(net, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, addr, bearer_id); - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, link_id); + tipc_mon_peer_up(net, n->addr, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, n->link_id); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, addr, bearer_id); - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - addr, link_id); + tipc_mon_peer_down(net, n->addr, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); } }
@@@ -1734,7 -1739,7 +1739,7 @@@ int tipc_node_xmit(struct net *net, str }
/* tipc_node_xmit_skb(): send single buffer to destination - * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. @@@ -2009,7 -2014,7 +2014,7 @@@ static bool tipc_node_check_state(struc return true; }
- /* No synching needed if only one link */ + /* No syncing needed if only one link */ if (!pl || !tipc_link_is_up(pl)) return true;
diff --combined net/tipc/socket.c index 022999e0202d,f21162aa0cf7..58935cd0d068 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@@ -3,7 -3,7 +3,7 @@@ * * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@@ -111,7 -111,6 +111,6 @@@ struct tipc_sock struct sock sk; u32 conn_type; u32 conn_instance; - int published; u32 max_pkt; u32 maxnagle; u32 portid; @@@ -141,6 -140,7 +140,7 @@@ bool expect_ack; bool nodelay; bool group_is_open; + bool published; };
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@@ -151,10 -151,8 +151,8 @@@ static int tipc_release(struct socket * static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); static void tipc_sk_timeout(struct timer_list *t); - static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq); - static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq); + static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); + static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); @@@ -644,7 -642,7 +642,7 @@@ static int tipc_release(struct socket * __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); - tipc_sk_withdraw(tsk, 0, NULL); + tipc_sk_withdraw(tsk, NULL); __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@@ -677,22 -675,31 +675,31 @@@ */ static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + bool unbind = false;
if (unlikely(!alen)) - return tipc_sk_withdraw(tsk, 0, NULL); + return tipc_sk_withdraw(tsk, NULL);
- if (addr->addrtype == TIPC_SERVICE_ADDR) - addr->addr.nameseq.upper = addr->addr.nameseq.lower; + if (ua->addrtype == TIPC_SERVICE_ADDR) { + ua->addrtype = TIPC_SERVICE_RANGE; + ua->sr.upper = ua->sr.lower; + } + if (ua->scope < 0) { + unbind = true; + ua->scope = -ua->scope; + } + /* Users may still use deprecated TIPC_ZONE_SCOPE */ + if (ua->scope != TIPC_NODE_SCOPE) + ua->scope = TIPC_CLUSTER_SCOPE;
if (tsk->group) return -EACCES;
- if (addr->scope >= 0) - return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq); - else - return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); + if (unbind) + return tipc_sk_withdraw(tsk, ua); + return tipc_sk_publish(tsk, ua); }
int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) @@@ -707,18 -714,17 +714,17 @@@
static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + u32 atype = ua->addrtype;
if (alen) { - if (alen < sizeof(struct sockaddr_tipc)) + if (!tipc_uaddr_valid(ua, alen)) return -EINVAL; - if (addr->family != AF_TIPC) + if (atype == TIPC_SOCKET_ADDR) return -EAFNOSUPPORT; - if (addr->addrtype > TIPC_SERVICE_ADDR) - return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) { + if (ua->sr.type < TIPC_RESERVED_TYPES) { pr_warn_once("Can't bind to reserved service type %u\n", - addr->addr.nameseq.type); + ua->sr.type); return -EACCES; } } @@@ -826,7 -832,7 +832,7 @@@ static __poll_t tipc_poll(struct file * /** * tipc_sendmcast - send multicast message * @sock: socket structure - * @seq: destination address + * @ua: destination address struct * @msg: message to send * @dlen: length of data to send * @timeout: timeout to wait for wakeup @@@ -834,7 -840,7 +840,7 @@@ * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ - static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, + static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua, struct msghdr *msg, size_t dlen, long timeout) { struct sock *sk = sock->sk; @@@ -842,7 -848,6 +848,6 @@@ struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); - struct tipc_mc_method *method = &tsk->mc_method; struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@@ -857,8 -862,7 +862,7 @@@
/* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); - tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, &dsts); + tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH;
@@@ -868,9 -872,9 +872,9 @@@ msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); + msg_set_nametype(hdr, ua->sr.type); + msg_set_namelower(hdr, ua->sr.lower); + msg_set_nameupper(hdr, ua->sr.upper);
/* Build message as chain of buffers */ __skb_queue_head_init(&pkts); @@@ -880,7 -884,7 +884,7 @@@ if (unlikely(rc == dlen)) { trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_mcast_xmit(net, &pkts, method, &dsts, + rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts, &tsk->cong_link_cnt); }
@@@ -954,7 -958,7 +958,7 @@@ static int tipc_send_group_unicast(stru int dlen, long timeout) { struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); @@@ -962,8 -966,8 +966,8 @@@ u32 node, port; int rc;
- node = dest->addr.id.node; - port = dest->addr.id.ref; + node = ua->sk.node; + port = ua->sk.ref; if (!port && !node) return -EHOSTUNREACH;
@@@ -997,7 -1001,7 +1001,7 @@@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct list_head *cong_links = &tsk->cong_links; @@@ -1008,16 -1012,13 +1012,13 @@@ struct net *net = sock_net(sk); u32 node, port, exclude; struct list_head dsts; - u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong;
INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr);
while (++lookups < 4) { exclude = tipc_group_exclude(tsk->group); @@@ -1026,8 -1027,8 +1027,8 @@@
/* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, false)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, + exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); cong = tipc_group_cong(tsk->group, node, port, blks, @@@ -1082,7 -1083,7 +1083,7 @@@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); @@@ -1107,9 -1108,9 +1108,9 @@@ return -EHOSTUNREACH;
/* Complete message header */ - if (dest) { + if (ua) { msg_set_type(hdr, TIPC_GRP_MCAST_MSG); - msg_set_nameinst(hdr, dest->addr.name.name.instance); + msg_set_nameinst(hdr, ua->sa.instance); } else { msg_set_type(hdr, TIPC_GRP_BCAST_MSG); msg_set_nameinst(hdr, 0); @@@ -1156,29 -1157,25 +1157,25 @@@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 type, inst, scope, exclude; struct list_head dsts; - u32 dstcnt; + u32 dstcnt, exclude;
INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp);
- if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, true)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true)) return -EHOSTUNREACH;
if (dstcnt == 1) { - tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref); return tipc_send_group_unicast(sock, m, dlen, timeout); }
@@@ -1198,17 -1195,18 +1195,18 @@@ void tipc_sk_mcast_rcv(struct net *net struct sk_buff_head *inputq) { u32 self = tipc_own_addr(net); - u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; u32 portid, onode; struct sk_buff_head tmpq; struct list_head dports; struct tipc_msg *hdr; + struct tipc_uaddr ua; int user, mtyp, hlen; bool exact;
__skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); + ua.addrtype = TIPC_SERVICE_RANGE;
skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { @@@ -1217,7 -1215,7 +1215,7 @@@ mtyp = msg_type(hdr); hlen = skb_headroom(skb) + msg_hdr_sz(hdr); onode = msg_orignode(hdr); - type = msg_nametype(hdr); + ua.sr.type = msg_nametype(hdr);
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); @@@ -1232,24 -1230,23 +1230,23 @@@
/* Group messages require exact scope match */ if (msg_in_group(hdr)) { - lower = 0; - upper = ~0; - scope = msg_lookup_scope(hdr); + ua.sr.lower = 0; + ua.sr.upper = ~0; + ua.scope = msg_lookup_scope(hdr); exact = true; } else { /* TIPC_NODE_SCOPE means "any scope" in this context */ if (onode == self) - scope = TIPC_NODE_SCOPE; + ua.scope = TIPC_NODE_SCOPE; else - scope = TIPC_CLUSTER_SCOPE; + ua.scope = TIPC_CLUSTER_SCOPE; exact = false; - lower = msg_namelower(hdr); - upper = msg_nameupper(hdr); + ua.sr.lower = msg_namelower(hdr); + ua.sr.upper = msg_nameupper(hdr); }
/* Create destination port list: */ - tipc_nametbl_mc_lookup(net, type, lower, upper, - scope, exact, &dports); + tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
/* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { @@@ -1265,7 -1262,7 +1262,7 @@@ spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); - kfree_skb(__skb_dequeue(arrvq)); + __skb_dequeue(arrvq); } spin_unlock_bh(&inputq->lock); __skb_queue_purge(&tmpq); @@@ -1417,44 -1414,43 +1414,43 @@@ static int __tipc_sendmsg(struct socke struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_service_range *seq; + struct tipc_socket_addr skaddr; struct sk_buff_head pkts; - u32 dport = 0, dnode = 0; - u32 type = 0, inst = 0; - int mtu, rc; + int atype, mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE;
- if (likely(dest)) { - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - if (unlikely(dest->family != AF_TIPC)) + if (ua) { + if (!tipc_uaddr_valid(ua, m->msg_namelen)) return -EINVAL; + atype = ua->addrtype; }
+ /* If socket belongs to a communication group follow other paths */ if (grp) { - if (!dest) + if (!ua) return tipc_send_group_bcast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_SERVICE_ADDR) + if (atype == TIPC_SERVICE_ADDR) return tipc_send_group_anycast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_SOCKET_ADDR) + if (atype == TIPC_SOCKET_ADDR) return tipc_send_group_unicast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_MCAST) + if (atype == TIPC_SERVICE_RANGE) return tipc_send_group_mcast(sock, m, dlen, timeout); return -EINVAL; }
- if (unlikely(!dest)) { - dest = &tsk->peer; - if (!syn && dest->family != AF_TIPC) + if (!ua) { + ua = (struct tipc_uaddr *)&tsk->peer; + if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; + atype = ua->addrtype; }
if (unlikely(syn)) { @@@ -1464,54 -1460,51 +1460,51 @@@ return -EISCONN; if (tsk->published) return -EOPNOTSUPP; - if (dest->addrtype == TIPC_SERVICE_ADDR) { - tsk->conn_type = dest->addr.name.name.type; - tsk->conn_instance = dest->addr.name.name.instance; + if (atype == TIPC_SERVICE_ADDR) { + tsk->conn_type = ua->sa.type; + tsk->conn_instance = ua->sa.instance; } msg_set_syn(hdr, 1); }
- seq = &dest->addr.nameseq; - if (dest->addrtype == TIPC_ADDR_MCAST) - return tipc_sendmcast(sock, seq, m, dlen, timeout); - - if (dest->addrtype == TIPC_SERVICE_ADDR) { - type = dest->addr.name.name.type; - inst = dest->addr.name.name.instance; - dnode = dest->addr.name.domain; - dport = tipc_nametbl_translate(net, type, inst, &dnode); - if (unlikely(!dport && !dnode)) + /* Determine destination */ + if (atype == TIPC_SERVICE_RANGE) { + return tipc_sendmcast(sock, ua, m, dlen, timeout); + } else if (atype == TIPC_SERVICE_ADDR) { + skaddr.node = ua->lookup_node; + ua->scope = tipc_node2scope(skaddr.node); + if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_SOCKET_ADDR) { - dnode = dest->addr.id.node; + } else if (atype == TIPC_SOCKET_ADDR) { + skaddr = ua->sk; } else { return -EINVAL; }
/* Block or return if destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, - !tipc_dest_find(clinks, dnode, 0)); + !tipc_dest_find(clinks, skaddr.node, 0)); if (unlikely(rc)) return rc;
- if (dest->addrtype == TIPC_SERVICE_ADDR) { + /* Finally build message header */ + msg_set_destnode(hdr, skaddr.node); + msg_set_destport(hdr, skaddr.ref); + if (atype == TIPC_SERVICE_ADDR) { msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); - msg_set_nametype(hdr, type); - msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dport); + msg_set_nametype(hdr, ua->sa.type); + msg_set_nameinst(hdr, ua->sa.instance); + msg_set_lookup_scope(hdr, ua->scope); } else { /* TIPC_SOCKET_ADDR */ msg_set_type(hdr, TIPC_DIRECT_MSG); msg_set_lookup_scope(hdr, 0); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dest->addr.id.ref); msg_set_hdr_sz(hdr, BASIC_H_SIZE); }
+ /* Add message body */ __skb_queue_head_init(&pkts); - mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true); + mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@@ -1520,10 -1513,11 +1513,11 @@@ return -ENOMEM; }
+ /* Send message */ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - tipc_dest_push(clinks, dnode, 0); + tipc_dest_push(clinks, skaddr.node, 0); tsk->cong_link_cnt++; rc = 0; } @@@ -2891,66 -2885,62 +2885,62 @@@ static void tipc_sk_timeout(struct time sock_put(sk); }
- static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq) + static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); - struct publication *publ; + struct tipc_socket_addr skaddr; + struct publication *p; u32 key;
- if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) return -EADDRINUSE; - - publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, - scope, tsk->portid, key); - if (unlikely(!publ)) + skaddr.ref = tsk->portid; + skaddr.node = tipc_own_addr(net); + p = tipc_nametbl_publish(net, ua, &skaddr, key); + if (unlikely(!p)) return -EINVAL;
- list_add(&publ->binding_sock, &tsk->publications); + list_add(&p->binding_sock, &tsk->publications); tsk->pub_count++; - tsk->published = 1; + tsk->published = true; return 0; }
- static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq) + static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct net *net = sock_net(&tsk->sk); - struct publication *publ; - struct publication *safe; + struct publication *safe, *p; + struct tipc_uaddr _ua; int rc = -EINVAL;
- if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - - list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { - if (seq) { - if (publ->scope != scope) - continue; - if (publ->type != seq->type) - continue; - if (publ->lower != seq->lower) - continue; - if (publ->upper != seq->upper) - break; - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); - rc = 0; - break; + list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) { + if (!ua) { + tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope, + p->sr.type, p->sr.lower, p->sr.upper); + tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key); + continue; } - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); + /* Unbind specific publication */ + if (p->scope != ua->scope) + continue; + if (p->sr.type != ua->sr.type) + continue; + if (p->sr.lower != ua->sr.lower) + continue; + if (p->sr.upper != ua->sr.upper) + break; + tipc_nametbl_withdraw(net, ua, &p->sk, p->key); rc = 0; + break; } - if (list_empty(&tsk->publications)) + if (list_empty(&tsk->publications)) { tsk->published = 0; + rc = 0; + } return rc; }
@@@ -3067,13 -3057,15 +3057,15 @@@ static int tipc_sk_join(struct tipc_soc struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_service_range seq; + struct tipc_uaddr ua; int rc;
if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; if (mreq->scope > TIPC_NODE_SCOPE) return -EINVAL; + if (mreq->scope != TIPC_NODE_SCOPE) + mreq->scope = TIPC_CLUSTER_SCOPE; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); @@@ -3083,11 -3075,10 +3075,10 @@@ msg_set_lookup_scope(hdr, mreq->scope); msg_set_nametype(hdr, mreq->type); msg_set_dest_droppable(hdr, true); - seq.type = mreq->type; - seq.lower = mreq->instance; - seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); - rc = tipc_sk_publish(tsk, mreq->scope, &seq); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope, + mreq->type, mreq->instance, mreq->instance); + tipc_nametbl_build_group(net, grp, &ua); + rc = tipc_sk_publish(tsk, &ua); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; @@@ -3104,15 -3095,17 +3095,17 @@@ static int tipc_sk_leave(struct tipc_so { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; - struct tipc_service_range seq; + struct tipc_uaddr ua; int scope;
if (!grp) return -EINVAL; - tipc_group_self(grp, &seq, &scope); + ua.addrtype = TIPC_SERVICE_RANGE; + tipc_group_self(grp, &ua.sr, &scope); + ua.scope = scope; tipc_group_delete(net, grp); tsk->group = NULL; - tipc_sk_withdraw(tsk, scope, &seq); + tipc_sk_withdraw(tsk, &ua); return 0; }
@@@ -3711,11 -3704,11 +3704,11 @@@ static int __tipc_nl_add_sk_publ(struc
if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper)) goto attr_msg_cancel;
nla_nest_end(skb, attrs); @@@ -3863,9 -3856,9 +3856,9 @@@ bool tipc_sk_filtering(struct sock *sk p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); if (p) { - type = p->type; - lower = p->lower; - upper = p->upper; + type = p->sr.type; + lower = p->sr.lower; + upper = p->sr.upper; } }
@@@ -3964,9 -3957,9 +3957,9 @@@ int tipc_sk_dump(struct sock *sk, u16 d if (tsk->published) { p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0); } i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); diff --combined tools/lib/bpf/xsk.c index d24b5cc720ec,95da0e19f4a5..cea62cc3e456 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@@ -28,6 -28,7 +28,7 @@@ #include <sys/mman.h> #include <sys/socket.h> #include <sys/types.h> + #include <linux/if_link.h>
#include "bpf.h" #include "libbpf.h" @@@ -59,8 -60,6 +60,8 @@@ struct xsk_umem int fd; int refcount; struct list_head ctx_list; + bool rx_ring_setup_done; + bool tx_ring_setup_done; };
struct xsk_ctx { @@@ -72,8 -71,10 +73,10 @@@ int ifindex; struct list_head list; int prog_fd; + int link_fd; int xsks_map_fd; char ifname[IFNAMSIZ]; + bool has_bpf_link; };
struct xsk_socket { @@@ -411,7 -412,7 +414,7 @@@ static int xsk_load_xdp_prog(struct xsk static const int log_buf_size = 16 * 1024; struct xsk_ctx *ctx = xsk->ctx; char log_buf[log_buf_size]; - int err, prog_fd; + int prog_fd;
/* This is the fallback C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) @@@ -501,14 -502,41 +504,41 @@@ return prog_fd; }
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd, - xsk->config.xdp_flags); + ctx->prog_fd = prog_fd; + return 0; + } + + static int xsk_create_bpf_link(struct xsk_socket *xsk) + { + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int link_fd; + int err; + + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); if (err) { - close(prog_fd); + pr_warn("getting XDP prog id failed\n"); return err; }
- ctx->prog_fd = prog_fd; + /* if there's a netlink-based XDP prog loaded on interface, bail out + * and ask user to do the removal by himself + */ + if (prog_id) { + pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); + return -EINVAL; + } + + opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); + + link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); + if (link_fd < 0) { + pr_warn("bpf_link_create failed: %s\n", strerror(errno)); + return link_fd; + } + + ctx->link_fd = link_fd; return 0; }
@@@ -627,7 -655,6 +657,6 @@@ static int xsk_lookup_bpf_maps(struct x close(fd); }
- err = 0; if (ctx->xsks_map_fd == -1) err = -ENOENT;
@@@ -644,6 -671,98 +673,98 @@@ static int xsk_set_bpf_maps(struct xsk_ &xsk->fd, 0); }
+ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) + { + struct bpf_link_info link_info; + __u32 link_len; + __u32 id = 0; + int err; + int fd; + + while (true) { + err = bpf_link_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + pr_warn("can't get next link: %s\n", strerror(errno)); + break; + } + + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); + err = -errno; + break; + } + + link_len = sizeof(struct bpf_link_info); + memset(&link_info, 0, link_len); + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); + if (err) { + pr_warn("can't get link info: %s\n", strerror(errno)); + close(fd); + break; + } + if (link_info.type == BPF_LINK_TYPE_XDP) { + if (link_info.xdp.ifindex == ifindex) { + *link_fd = fd; + if (prog_id) + *prog_id = link_info.prog_id; + break; + } + } + close(fd); + } + + return err; + } + + static bool xsk_probe_bpf_link(void) + { + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, + .flags = XDP_FLAGS_SKB_MODE); + struct bpf_load_program_attr prog_attr; + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), + BPF_EXIT_INSN() + }; + int prog_fd, link_fd = -1; + int ifindex_lo = 1; + bool ret = false; + int err; + + err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); + if (err) + return ret; + + if (link_fd >= 0) + return true; + + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + if (prog_fd < 0) + return ret; + + link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); + close(prog_fd); + + if (link_fd >= 0) { + ret = true; + close(link_fd); + } + + return ret; + } + static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) { char ifname[IFNAMSIZ]; @@@ -665,64 -784,108 +786,108 @@@ ctx->ifname[IFNAMSIZ - 1] = 0;
xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
return 0; }
- static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, - int *xsks_map_fd) + static int xsk_init_xdp_res(struct xsk_socket *xsk, + int *xsks_map_fd) { - struct xsk_socket *xsk = _xdp; struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; int err;
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, - xsk->config.xdp_flags); + err = xsk_create_bpf_maps(xsk); if (err) return err;
- if (!prog_id) { - err = xsk_create_bpf_maps(xsk); - if (err) - return err; + err = xsk_load_xdp_prog(xsk); + if (err) + goto err_load_xdp_prog;
- err = xsk_load_xdp_prog(xsk); - if (err) { - goto err_load_xdp_prog; - } - } else { - ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (ctx->prog_fd < 0) - return -errno; - err = xsk_lookup_bpf_maps(xsk); - if (err) { - close(ctx->prog_fd); - return err; - } - } + if (ctx->has_bpf_link) + err = xsk_create_bpf_link(xsk); + else + err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd, + xsk->config.xdp_flags);
- if (xsk->rx) { - err = xsk_set_bpf_maps(xsk); - if (err) { - if (!prog_id) { - goto err_set_bpf_maps; - } else { - close(ctx->prog_fd); - return err; - } - } - } - if (xsks_map_fd) - *xsks_map_fd = ctx->xsks_map_fd; + if (err) + goto err_attach_xdp_prog;
- return 0; + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_bpf_maps; + + return err;
err_set_bpf_maps: + if (ctx->has_bpf_link) + close(ctx->link_fd); + else + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); + err_attach_xdp_prog: close(ctx->prog_fd); - bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); err_load_xdp_prog: xsk_delete_bpf_maps(xsk); + return err; + } + + static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) + { + struct xsk_ctx *ctx = xsk->ctx; + int err; + + ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (ctx->prog_fd < 0) { + err = -errno; + goto err_prog_fd; + } + err = xsk_lookup_bpf_maps(xsk); + if (err) + goto err_lookup_maps; + + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_maps; + + return err; + + err_set_maps: + close(ctx->xsks_map_fd); + err_lookup_maps: + close(ctx->prog_fd); + err_prog_fd: + if (ctx->has_bpf_link) + close(ctx->link_fd); + return err; + } + + static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) + { + struct xsk_socket *xsk = _xdp; + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int err; + + if (ctx->has_bpf_link) + err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); + else + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); + + if (err) + return err; + + err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : + xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); + + if (!err && xsks_map_fd) + *xsks_map_fd = ctx->xsks_map_fd;
return err; } @@@ -745,30 -908,26 +910,30 @@@ static struct xsk_ctx *xsk_get_ctx(stru return NULL; }
-static void xsk_put_ctx(struct xsk_ctx *ctx) +static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) { struct xsk_umem *umem = ctx->umem; struct xdp_mmap_offsets off; int err;
- if (--ctx->refcount == 0) { - err = xsk_get_mmap_offsets(umem->fd, &off); - if (!err) { - munmap(ctx->fill->ring - off.fr.desc, - off.fr.desc + umem->config.fill_size * - sizeof(__u64)); - munmap(ctx->comp->ring - off.cr.desc, - off.cr.desc + umem->config.comp_size * - sizeof(__u64)); - } + if (--ctx->refcount) + return;
- list_del(&ctx->list); - free(ctx); - } + if (!unmap) + goto out_free; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (err) + goto out_free; + + munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * + sizeof(__u64)); + munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * + sizeof(__u64)); + +out_free: + list_del(&ctx->list); + free(ctx); }
static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, @@@ -803,6 -962,8 +968,6 @@@ memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); ctx->ifname[IFNAMSIZ - 1] = '\0';
- umem->fill_save = NULL; - umem->comp_save = NULL; ctx->fill = fill; ctx->comp = comp; list_add(&ctx->list, &umem->ctx_list); @@@ -858,8 -1019,6 +1023,8 @@@ int xsk_socket__create_shared(struct xs struct xsk_socket *xsk; struct xsk_ctx *ctx; int err, ifindex; + bool unmap = umem->fill_save != fill; + bool rx_setup_done = false, tx_setup_done = false;
if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; @@@ -887,8 -1046,6 +1052,8 @@@ } } else { xsk->fd = umem->fd; + rx_setup_done = umem->rx_ring_setup_done; + tx_setup_done = umem->tx_ring_setup_done; }
ctx = xsk_get_ctx(umem, ifindex, queue_id); @@@ -906,8 -1063,9 +1071,9 @@@ } } xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
- if (rx) { + if (rx && !rx_setup_done) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, &xsk->config.rx_size, sizeof(xsk->config.rx_size)); @@@ -915,10 -1073,8 +1081,10 @@@ err = -errno; goto out_put_ctx; } + if (xsk->fd == umem->fd) + umem->rx_ring_setup_done = true; } - if (tx) { + if (tx && !tx_setup_done) { err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, &xsk->config.tx_size, sizeof(xsk->config.tx_size)); @@@ -926,8 -1082,6 +1092,8 @@@ err = -errno; goto out_put_ctx; } + if (xsk->fd == umem->fd) + umem->rx_ring_setup_done = true; }
err = xsk_get_mmap_offsets(xsk->fd, &off); @@@ -1006,8 -1160,6 +1172,8 @@@ }
*xsk_ptr = xsk; + umem->fill_save = NULL; + umem->comp_save = NULL; return 0;
out_mmap_tx: @@@ -1019,7 -1171,7 +1185,7 @@@ out_mmap_rx munmap(rx_map, off.rx.desc + xsk->config.rx_size * sizeof(struct xdp_desc)); out_put_ctx: - xsk_put_ctx(ctx); + xsk_put_ctx(ctx, unmap); out_socket: if (--umem->refcount) close(xsk->fd); @@@ -1033,9 -1185,6 +1199,9 @@@ int xsk_socket__create(struct xsk_socke struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *usr_config) { + if (!umem) + return -EFAULT; + return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, rx, tx, umem->fill_save, umem->comp_save, usr_config); @@@ -1071,6 -1220,8 +1237,8 @@@ void xsk_socket__delete(struct xsk_sock if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); + if (ctx->has_bpf_link) + close(ctx->link_fd); }
err = xsk_get_mmap_offsets(xsk->fd, &off); @@@ -1085,7 -1236,7 +1253,7 @@@ } }
- xsk_put_ctx(ctx); + xsk_put_ctx(ctx, true);
umem->refcount--; /* Do not close an fd that also has an associated umem connected