The following commit has been merged in the master branch: commit 8859a44ea0df92bccdc942ef15781ebbfe0ad9f3 Merge: 6c5e6b4ccc1bb9ac56579a9aed25d517d2318be6 4e04e7513b0fa2fe8966a1c83fb473f1667e2810 Author: Jakub Kicinski kuba@kernel.org Date: Fri Apr 9 20:46:01 2021 -0700
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Conflicts:
MAINTAINERS - keep Chandrasekar drivers/net/ethernet/mellanox/mlx5/core/en_main.c - simple fix + trust the code re-added to param.c in -next is fine include/linux/bpf.h - trivial include/linux/ethtool.h - trivial, fix kdoc while at it include/linux/skmsg.h - move to relevant place in tcp.c, comment re-wrapped net/core/skmsg.c - add the sk = sk // sk = NULL around calls net/tipc/crypto.c - trivial
Signed-off-by: Jakub Kicinski kuba@kernel.org
diff --combined Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml index 2a3be0f9a1a1,13c26f23a820..2f46e45dcd60 --- a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml +++ b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml @@@ -22,25 -22,17 +22,25 @@@ properties maxItems: 1
interrupts: - description: RX interrupt + minItems: 1 + maxItems: 2 + items: + - description: RX interrupt + - description: TX interrupt
interrupt-names: - const: rx + minItems: 1 + maxItems: 2 + items: + - const: rx + - const: tx
required: - reg - interrupts - interrupt-names
- additionalProperties: false + unevaluatedProperties: false
examples: - | @@@ -51,7 -43,6 +51,7 @@@ compatible = "brcm,bcm4908-enet"; reg = <0x80002000 0x1000>;
- interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "rx"; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "rx", "tx"; }; diff --combined Documentation/networking/ethtool-netlink.rst index fd84f4ed898a,dc03ff884541..ce4a69f8308f --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@@ -208,8 -208,6 +208,8 @@@ Userspace to kernel ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info + ``ETHTOOL_MSG_FEC_GET`` get FEC settings + ``ETHTOOL_MSG_FEC_SET`` set FEC settings ===================================== ================================
Kernel to userspace: @@@ -244,8 -242,6 +244,8 @@@ ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info + ``ETHTOOL_MSG_FEC_GET_REPLY`` FEC settings + ``ETHTOOL_MSG_FEC_NTF`` FEC settings ===================================== =================================
``GET`` requests are sent by userspace applications to retrieve device @@@ -980,9 -976,9 +980,9 @@@ constraints on coalescing parameters an
PAUSE_GET - ============ + =========
- Gets channel counts like ``ETHTOOL_GPAUSE`` ioctl request. + Gets pause frame settings like ``ETHTOOL_GPAUSEPARAM`` ioctl request.
Request contents:
@@@ -1011,7 -1007,7 +1011,7 @@@ the statistics in the following structu Each member has a corresponding attribute defined.
PAUSE_SET - ============ + =========
Sets pause parameters like ``ETHTOOL_GPAUSEPARAM`` ioctl request.
@@@ -1028,7 -1024,7 +1028,7 @@@ Request contents EEE_GET =======
- Gets channel counts like ``ETHTOOL_GEEE`` ioctl request. + Gets Energy Efficient Ethernet settings like ``ETHTOOL_GEEE`` ioctl request.
Request contents:
@@@ -1058,7 -1054,7 +1058,7 @@@ first 32 are provided by the ``ethtool_ EEE_SET =======
- Sets pause parameters like ``ETHTOOL_GEEEPARAM`` ioctl request. + Sets Energy Efficient Ethernet parameters like ``ETHTOOL_SEEE`` ioctl request.
Request contents:
@@@ -1284,60 -1280,6 +1284,60 @@@ Kernel response contents For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that the table contains static entries, hard-coded by the NIC.
+FEC_GET +======= + +Gets FEC configuration and state like ``ETHTOOL_GFECPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ``ETHTOOL_A_FEC_ACTIVE`` u32 index of active FEC mode + ===================================== ====== ========================== + +``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently +active on the interface. This attribute may not be present if device does +not support FEC. + +``ETHTOOL_A_FEC_MODES`` and ``ETHTOOL_A_FEC_AUTO`` are only meaningful when +autonegotiation is disabled. If ``ETHTOOL_A_FEC_AUTO`` is non-zero driver will +select the FEC mode automatically based on the parameters of the SFP module. +This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface. +``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode +bits (rather than old ``ETHTOOL_FEC_*`` bits). + +FEC_SET +======= + +Sets FEC parameters like ``ETHTOOL_SFECPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ===================================== ====== ========================== + +``FEC_SET`` is only meaningful when autonegotiation is disabled. Otherwise +FEC mode is selected as part of autonegotiation. + +``ETHTOOL_A_FEC_MODES`` selects which FEC mode should be used. It's recommended +to set only one bit, if multiple bits are set driver may choose between them +in an implementation specific way. + +``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP +module parameters. This does not mean autonegotiation. + Request translation ===================
@@@ -1431,9 -1373,9 +1431,9 @@@ are netlink only ``ETHTOOL_MSG_LINKMODES_SET`` ``ETHTOOL_PHY_GTUNABLE`` n/a ``ETHTOOL_PHY_STUNABLE`` n/a - ``ETHTOOL_GFECPARAM`` n/a - ``ETHTOOL_SFECPARAM`` n/a - n/a ''ETHTOOL_MSG_CABLE_TEST_ACT'' - n/a ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT'' + ``ETHTOOL_GFECPARAM`` ``ETHTOOL_MSG_FEC_GET`` + ``ETHTOOL_SFECPARAM`` ``ETHTOOL_MSG_FEC_SET`` + n/a ``ETHTOOL_MSG_CABLE_TEST_ACT`` + n/a ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` =================================== ===================================== diff --combined MAINTAINERS index 3ea9539821b5,ccd9228350cf..795b9941c151 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -1530,7 -1530,6 +1530,7 @@@ F: Documentation/devicetree/bindings/dm F: Documentation/devicetree/bindings/i2c/i2c-owl.yaml F: Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml F: Documentation/devicetree/bindings/mmc/owl-mmc.yaml +F: Documentation/devicetree/bindings/net/actions,owl-emac.yaml F: Documentation/devicetree/bindings/pinctrl/actions,* F: Documentation/devicetree/bindings/power/actions,owl-sps.txt F: Documentation/devicetree/bindings/timer/actions,owl-timer.txt @@@ -1543,7 -1542,6 +1543,7 @@@ F: drivers/dma/owl-dma. F: drivers/i2c/busses/i2c-owl.c F: drivers/irqchip/irq-owl-sirq.c F: drivers/mmc/host/owl-mmc.c +F: drivers/net/ethernet/actions/ F: drivers/pinctrl/actions/* F: drivers/soc/actions/ F: include/dt-bindings/power/owl-* @@@ -2491,7 -2489,7 +2491,7 @@@ N: sc27x N: sc2731
ARM/STI ARCHITECTURE - M: Patrice Chotard patrice.chotard@st.com + M: Patrice Chotard patrice.chotard@foss.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained W: http://www.stlinux.com @@@ -2524,7 -2522,7 +2524,7 @@@ F: include/linux/remoteproc/st_slim_rpr
ARM/STM32 ARCHITECTURE M: Maxime Coquelin mcoquelin.stm32@gmail.com - M: Alexandre Torgue alexandre.torgue@st.com + M: Alexandre Torgue alexandre.torgue@foss.st.com L: linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@@ -3117,7 -3115,7 +3117,7 @@@ C: irc://irc.oftc.net/bcach F: drivers/md/bcache/
BDISP ST MEDIA DRIVER - M: Fabien Dessenne fabien.dessenne@st.com + M: Fabien Dessenne fabien.dessenne@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -3235,7 -3233,6 +3235,7 @@@ T: git git://git.kernel.org/pub/scm/lin T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git F: Documentation/bpf/ F: Documentation/networking/filter.rst +F: Documentation/userspace-api/ebpf/ F: arch/*/net/* F: include/linux/bpf* F: include/linux/filter.h @@@ -3250,7 -3247,6 +3250,7 @@@ F: net/core/filter. F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ +F: scripts/bpf_doc.py F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ @@@ -3679,7 -3675,7 +3679,7 @@@ M: bcm-kernel-feedback-list@broadcom.co L: linux-pm@vger.kernel.org S: Maintained T: git git://github.com/broadcom/stblinux.git - F: drivers/soc/bcm/bcm-pmb.c + F: drivers/soc/bcm/bcm63xx/bcm-pmb.c F: include/dt-bindings/soc/bcm-pmb.h
BROADCOM SPECIFIC AMBA DRIVER (BCMA) @@@ -5084,7 -5080,7 +5084,7 @@@ S: Maintaine F: drivers/platform/x86/dell/dell-wmi.c
DELTA ST MEDIA DRIVER - M: Hugues Fruchet hugues.fruchet@st.com + M: Hugues Fruchet hugues.fruchet@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -5475,11 -5471,11 +5475,11 @@@ F: drivers/net/ethernet/freescale/dpaa2 F: drivers/net/ethernet/freescale/dpaa2/dpni*
DPAA2 ETHERNET SWITCH DRIVER -M: Ioana Radulescu ruxandra.radulescu@nxp.com M: Ioana Ciornei ioana.ciornei@nxp.com -L: linux-kernel@vger.kernel.org +L: netdev@vger.kernel.org S: Maintained -F: drivers/staging/fsl-dpaa2/ethsw +F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch* +F: drivers/net/ethernet/freescale/dpaa2/dpsw*
DPT_I2O SCSI RAID DRIVER M: Adaptec OEM Raid Solutions aacraid@microsemi.com @@@ -6010,7 -6006,6 +6010,6 @@@ F: drivers/gpu/drm/rockchip
DRM DRIVERS FOR STI M: Benjamin Gaignard benjamin.gaignard@linaro.org - M: Vincent Abriou vincent.abriou@st.com L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@@ -6018,10 -6013,9 +6017,9 @@@ F: Documentation/devicetree/bindings/di F: drivers/gpu/drm/sti
DRM DRIVERS FOR STM - M: Yannick Fertre yannick.fertre@st.com - M: Philippe Cornu philippe.cornu@st.com + M: Yannick Fertre yannick.fertre@foss.st.com + M: Philippe Cornu philippe.cornu@foss.st.com M: Benjamin Gaignard benjamin.gaignard@linaro.org - M: Vincent Abriou vincent.abriou@st.com L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@@ -7480,8 -7474,9 +7478,9 @@@ F: include/uapi/asm-generic GENERIC PHY FRAMEWORK M: Kishon Vijay Abraham I kishon@ti.com M: Vinod Koul vkoul@kernel.org - L: linux-kernel@vger.kernel.org + L: linux-phy@lists.infradead.org S: Supported + Q: https://patchwork.kernel.org/project/linux-phy/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git F: Documentation/devicetree/bindings/phy/ F: drivers/phy/ @@@ -8234,7 -8229,7 +8233,7 @@@ F: include/linux/hugetlb. F: mm/hugetlb.c
HVA ST MEDIA DRIVER - M: Jean-Christophe Trotin jean-christophe.trotin@st.com + M: Jean-Christophe Trotin jean-christophe.trotin@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -10034,7 -10029,6 +10033,6 @@@ F: scripts/leaking_addresses.p
LED SUBSYSTEM M: Pavel Machek pavel@ucw.cz - R: Dan Murphy dmurphy@ti.com L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git @@@ -10695,7 -10689,6 +10693,7 @@@ F: include/linux/mv643xx.
MARVELL MV88X3310 PHY DRIVER M: Russell King linux@armlinux.org.uk +M: Marek Behun marek.behun@nic.cz L: netdev@vger.kernel.org S: Maintained F: drivers/net/phy/marvell10g.c @@@ -10911,7 -10904,7 +10909,7 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/radio/radio-maxiradio*
MCAN MMIO DEVICE DRIVER -M: Pankaj Sharma pankj.sharma@samsung.com +M: Chandrasekar Ramakrishnan rcsekar@samsung.com L: linux-can@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@@ -11171,7 -11164,7 +11169,7 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/dvb-frontends/stv6111*
MEDIA DRIVERS FOR STM32 - DCMI - M: Hugues Fruchet hugues.fruchet@st.com + M: Hugues Fruchet hugues.fruchet@foss.st.com L: linux-media@vger.kernel.org S: Supported T: git git://linuxtv.org/media_tree.git @@@ -14857,6 -14850,14 +14855,14 @@@ L: linux-arm-msm@vger.kernel.or S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
+ QUALCOMM IPC ROUTER (QRTR) DRIVER + M: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org + L: linux-arm-msm@vger.kernel.org + S: Maintained + F: include/trace/events/qrtr.h + F: include/uapi/linux/qrtr.h + F: net/qrtr/ + QUALCOMM IPCC MAILBOX DRIVER M: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org L: linux-arm-msm@vger.kernel.org @@@ -15206,6 -15207,7 +15212,7 @@@ F: fs/reiserfs REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM M: Ohad Ben-Cohen ohad@wizery.com M: Bjorn Andersson bjorn.andersson@linaro.org + M: Mathieu Poirier mathieu.poirier@linaro.org L: linux-remoteproc@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rproc-next @@@ -15219,6 -15221,7 +15226,7 @@@ F: include/linux/remoteproc REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM M: Ohad Ben-Cohen ohad@wizery.com M: Bjorn Andersson bjorn.andersson@linaro.org + M: Mathieu Poirier mathieu.poirier@linaro.org L: linux-remoteproc@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rpmsg-next @@@ -15635,8 -15638,8 +15643,8 @@@ F: Documentation/s390/pci.rs
S390 VFIO AP DRIVER M: Tony Krowiak akrowiak@linux.ibm.com - M: Pierre Morel pmorel@linux.ibm.com M: Halil Pasic pasic@linux.ibm.com + M: Jason Herne jjherne@linux.ibm.com L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@@ -15648,6 -15651,7 +15656,7 @@@ F: drivers/s390/crypto/vfio_ap_private. S390 VFIO-CCW DRIVER M: Cornelia Huck cohuck@redhat.com M: Eric Farman farman@linux.ibm.com + M: Matthew Rosato mjrosato@linux.ibm.com R: Halil Pasic pasic@linux.ibm.com L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org @@@ -15658,6 -15662,7 +15667,7 @@@ F: include/uapi/linux/vfio_ccw.
S390 VFIO-PCI DRIVER M: Matthew Rosato mjrosato@linux.ibm.com + M: Eric Farman farman@linux.ibm.com L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org S: Supported @@@ -16944,7 -16949,8 +16954,8 @@@ F: Documentation/devicetree/bindings/me F: drivers/media/i2c/st-mipid02.c
ST STM32 I2C/SMBUS DRIVER - M: Pierre-Yves MORDRET pierre-yves.mordret@st.com + M: Pierre-Yves MORDRET pierre-yves.mordret@foss.st.com + M: Alain Volmat alain.volmat@foss.st.com L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-stm32* @@@ -17069,7 -17075,7 +17080,7 @@@ F: kernel/jump_label. F: kernel/static_call.c
STI AUDIO (ASoC) DRIVERS - M: Arnaud Pouliquen arnaud.pouliquen@st.com + M: Arnaud Pouliquen arnaud.pouliquen@foss.st.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt @@@ -17089,15 -17095,15 +17100,15 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/usb/stk1160/
STM32 AUDIO (ASoC) DRIVERS - M: Olivier Moysan olivier.moysan@st.com - M: Arnaud Pouliquen arnaud.pouliquen@st.com + M: Olivier Moysan olivier.moysan@foss.st.com + M: Arnaud Pouliquen arnaud.pouliquen@foss.st.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml F: sound/soc/stm/
STM32 TIMER/LPTIMER DRIVERS - M: Fabrice Gasnier fabrice.gasnier@st.com + M: Fabrice Gasnier fabrice.gasnier@foss.st.com S: Maintained F: Documentation/ABI/testing/*timer-stm32 F: Documentation/devicetree/bindings/*/*stm32-*timer* @@@ -17107,7 -17113,7 +17118,7 @@@ F: include/linux/*/stm32-*tim
STMMAC ETHERNET DRIVER M: Giuseppe Cavallaro peppe.cavallaro@st.com - M: Alexandre Torgue alexandre.torgue@st.com + M: Alexandre Torgue alexandre.torgue@foss.st.com M: Jose Abreu joabreu@synopsys.com L: netdev@vger.kernel.org S: Supported @@@ -17849,7 -17855,6 +17860,6 @@@ S: Maintaine F: drivers/thermal/ti-soc-thermal/
TI BQ27XXX POWER SUPPLY DRIVER - R: Dan Murphy dmurphy@ti.com F: drivers/power/supply/bq27xxx_battery.c F: drivers/power/supply/bq27xxx_battery_i2c.c F: include/linux/power/bq27xxx_battery.h @@@ -17983,6 -17988,12 +17993,6 @@@ L: alsa-devel@alsa-project.org (moderat S: Odd Fixes F: sound/soc/codecs/tas571x*
-TI TCAN4X5X DEVICE DRIVER -L: linux-can@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/net/can/tcan4x5x.txt -F: drivers/net/can/m_can/tcan4x5x* - TI TRF7970A NFC DRIVER M: Mark Greer mgreer@animalcreek.com L: linux-wireless@vger.kernel.org diff --combined arch/x86/net/bpf_jit_comp.c index 9eead60f0301,7f1b3a862e14..1a467b2a5467 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@@ -1689,7 -1689,16 +1689,16 @@@ emit_jmp }
if (image) { - if (unlikely(proglen + ilen > oldproglen)) { + /* + * When populating the image, assert that: + * + * i) We do not write beyond the allocated space, and + * ii) addrs[i] did not change from the prior run, in order + * to validate assumptions made for computing branch + * displacements. + */ + if (unlikely(proglen + ilen > oldproglen || + proglen + ilen != addrs[i])) { pr_err("bpf_jit: fatal error\n"); return -EFAULT; } @@@ -2346,8 -2355,3 +2355,8 @@@ out tmp : orig_prog); return prog; } + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} diff --combined arch/x86/net/bpf_jit_comp32.c index 0a7a2870f111,6a99def7d315..3da88ded6ee3 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@@ -1390,19 -1390,6 +1390,19 @@@ static inline void emit_push_r64(const *pprog = prog; }
+static void emit_push_r32(const u8 src[], u8 **pprog) +{ + u8 *prog = *pprog; + int cnt = 0; + + /* mov ecx,dword ptr [ebp+off] */ + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); + /* push ecx */ + EMIT1(0x51); + + *pprog = prog; +} + static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo) { u8 jmp_cond; @@@ -1472,174 -1459,6 +1472,174 @@@ return jmp_cond; }
+/* i386 kernel compiles with "-mregparm=3". From gcc document: + * + * ==== snippet ==== + * regparm (number) + * On x86-32 targets, the regparm attribute causes the compiler + * to pass arguments number one to (number) if they are of integral + * type in registers EAX, EDX, and ECX instead of on the stack. + * Functions that take a variable number of arguments continue + * to be passed all of their arguments on the stack. + * ==== snippet ==== + * + * The first three args of a function will be considered for + * putting into the 32bit register EAX, EDX, and ECX. + * + * Two 32bit registers are used to pass a 64bit arg. + * + * For example, + * void foo(u32 a, u32 b, u32 c, u32 d): + * u32 a: EAX + * u32 b: EDX + * u32 c: ECX + * u32 d: stack + * + * void foo(u64 a, u32 b, u32 c): + * u64 a: EAX (lo32) EDX (hi32) + * u32 b: ECX + * u32 c: stack + * + * void foo(u32 a, u64 b, u32 c): + * u32 a: EAX + * u64 b: EDX (lo32) ECX (hi32) + * u32 c: stack + * + * void foo(u32 a, u32 b, u64 c): + * u32 a: EAX + * u32 b: EDX + * u64 c: stack + * + * The return value will be stored in the EAX (and EDX for 64bit value). + * + * For example, + * u32 foo(u32 a, u32 b, u32 c): + * return value: EAX + * + * u64 foo(u32 a, u32 b, u32 c): + * return value: EAX (lo32) EDX (hi32) + * + * Notes: + * The verifier only accepts function having integer and pointers + * as its args and return value, so it does not have + * struct-by-value. + * + * emit_kfunc_call() finds out the btf_func_model by calling + * bpf_jit_find_kfunc_model(). A btf_func_model + * has the details about the number of args, size of each arg, + * and the size of the return value. + * + * It first decides how many args can be passed by EAX, EDX, and ECX. + * That will decide what args should be pushed to the stack: + * [first_stack_regno, last_stack_regno] are the bpf regnos + * that should be pushed to the stack. + * + * It will first push all args to the stack because the push + * will need to use ECX. Then, it moves + * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX. + * + * When emitting a call (0xE8), it needs to figure out + * the jmp_offset relative to the jit-insn address immediately + * following the call (0xE8) instruction. At this point, it knows + * the end of the jit-insn address after completely translated the + * current (BPF_JMP | BPF_CALL) bpf-insn. It is passed as "end_addr" + * to the emit_kfunc_call(). Thus, it can learn the "immediate-follow-call" + * address by figuring out how many jit-insn is generated between + * the call (0xE8) and the end_addr: + * - 0-1 jit-insn (3 bytes each) to restore the esp pointer if there + * is arg pushed to the stack. + * - 0-2 jit-insns (3 bytes each) to handle the return value. + */ +static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr, + const struct bpf_insn *insn, u8 **pprog) +{ + const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX }; + int i, cnt = 0, first_stack_regno, last_stack_regno; + int free_arg_regs = ARRAY_SIZE(arg_regs); + const struct btf_func_model *fm; + int bytes_in_stack = 0; + const u8 *cur_arg_reg; + u8 *prog = *pprog; + s64 jmp_offset; + + fm = bpf_jit_find_kfunc_model(bpf_prog, insn); + if (!fm) + return -EINVAL; + + first_stack_regno = BPF_REG_1; + for (i = 0; i < fm->nr_args; i++) { + int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1; + + if (regs_needed > free_arg_regs) + break; + + free_arg_regs -= regs_needed; + first_stack_regno++; + } + + /* Push the args to the stack */ + last_stack_regno = BPF_REG_0 + fm->nr_args; + for (i = last_stack_regno; i >= first_stack_regno; i--) { + if (fm->arg_size[i - 1] > sizeof(u32)) { + emit_push_r64(bpf2ia32[i], &prog); + bytes_in_stack += 8; + } else { + emit_push_r32(bpf2ia32[i], &prog); + bytes_in_stack += 4; + } + } + + cur_arg_reg = &arg_regs[0]; + for (i = BPF_REG_1; i < first_stack_regno; i++) { + /* mov e[adc]x,dword ptr [ebp+off] */ + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++), + STACK_VAR(bpf2ia32[i][0])); + if (fm->arg_size[i - 1] > sizeof(u32)) + /* mov e[adc]x,dword ptr [ebp+off] */ + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++), + STACK_VAR(bpf2ia32[i][1])); + } + + if (bytes_in_stack) + /* add esp,"bytes_in_stack" */ + end_addr -= 3; + + /* mov dword ptr [ebp+off],edx */ + if (fm->ret_size > sizeof(u32)) + end_addr -= 3; + + /* mov dword ptr [ebp+off],eax */ + if (fm->ret_size) + end_addr -= 3; + + jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr; + if (!is_simm32(jmp_offset)) { + pr_err("unsupported BPF kernel function jmp_offset:%lld\n", + jmp_offset); + return -EINVAL; + } + + EMIT1_off32(0xE8, jmp_offset); + + if (fm->ret_size) + /* mov dword ptr [ebp+off],eax */ + EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(bpf2ia32[BPF_REG_0][0])); + + if (fm->ret_size > sizeof(u32)) + /* mov dword ptr [ebp+off],edx */ + EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), + STACK_VAR(bpf2ia32[BPF_REG_0][1])); + + if (bytes_in_stack) + /* add esp,"bytes_in_stack" */ + EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack); + + *pprog = prog; + + return 0; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@@ -2069,18 -1888,6 +2069,18 @@@ if (insn->src_reg == BPF_PSEUDO_CALL) goto notyet;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + int err; + + err = emit_kfunc_call(bpf_prog, + image + addrs[i], + insn, &prog); + + if (err) + return err; + break; + } + func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]);
@@@ -2469,7 -2276,16 +2469,16 @@@ notyet }
if (image) { - if (unlikely(proglen + ilen > oldproglen)) { + /* + * When populating the image, assert that: + * + * i) We do not write beyond the allocated space, and + * ii) addrs[i] did not change from the prior run, in order + * to validate assumptions made for computing branch + * displacements. + */ + if (unlikely(proglen + ilen > oldproglen || + proglen + ilen != addrs[i])) { pr_err("bpf_jit: fatal error\n"); return -EFAULT; } @@@ -2586,8 -2402,3 +2595,8 @@@ out tmp : orig_prog); return prog; } + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} diff --combined drivers/bluetooth/btusb.c index 192cb8c191bc,5cbfbd948f67..5d603ef39bad --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@@ -399,9 -399,7 +399,9 @@@ static const struct usb_device_id black
/* MediaTek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0e8d, 0xe0, 0x01, 0x01), - .driver_info = BTUSB_MEDIATEK }, + .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES },
/* Additional MediaTek MT7615E Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK}, @@@ -457,8 -455,6 +457,8 @@@ BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xc123), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0cb5, 0xc547), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH },
/* Silicon Wave based devices */ { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, @@@ -2404,7 -2400,7 +2404,7 @@@ static int btusb_send_frame_intel(struc return -EILSEQ; }
-static bool btusb_setup_intel_new_get_fw_name(struct intel_version *ver, +static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver, struct intel_boot_params *params, char *fw_name, size_t len, const char *suffix) @@@ -2428,10 -2424,9 +2428,10 @@@ suffix); break; default: - return false; + return -EINVAL; } - return true; + + return 0; }
static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv *ver_tlv, @@@ -2449,44 -2444,6 +2449,44 @@@ suffix); }
+static int btusb_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec) +{ + struct btusb_data *data = hci_get_drvdata(hdev); + ktime_t delta, rettime; + unsigned long long duration; + int err; + + set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); + + bt_dev_info(hdev, "Waiting for firmware download to complete"); + + err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(msec)); + if (err == -EINTR) { + bt_dev_err(hdev, "Firmware loading interrupted"); + return err; + } + + if (err) { + bt_dev_err(hdev, "Firmware loading timeout"); + return -ETIMEDOUT; + } + + if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) { + bt_dev_err(hdev, "Firmware loading failed"); + return -ENOEXEC; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long)ktime_to_ns(delta) >> 10; + + bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration); + + return 0; +} + static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev, struct intel_version_tlv *ver, u32 *boot_param) @@@ -2495,11 -2452,19 +2495,11 @@@ char fwname[64]; int err; struct btusb_data *data = hci_get_drvdata(hdev); + ktime_t calltime;
if (!ver || !boot_param) return -EINVAL;
- /* The hardware platform number has a fixed value of 0x37 and - * for now only accept this single value. - */ - if (INTEL_HW_PLATFORM(ver->cnvi_bt) != 0x37) { - bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)", - INTEL_HW_PLATFORM(ver->cnvi_bt)); - return -EINVAL; - } - /* The firmware variant determines if the device is in bootloader * mode or is running operational firmware. The value 0x03 identifies * the bootloader and the value 0x23 identifies the operational @@@ -2516,6 -2481,50 +2516,6 @@@ if (ver->img_type == 0x03) { clear_bit(BTUSB_BOOTLOADER, &data->flags); btintel_check_bdaddr(hdev); - return 0; - } - - /* Check for supported iBT hardware variants of this firmware - * loading method. - * - * This check has been put in place to ensure correct forward - * compatibility options when newer hardware variants come along. - */ - switch (INTEL_HW_VARIANT(ver->cnvi_bt)) { - case 0x17: /* TyP */ - case 0x18: /* Slr */ - case 0x19: /* Slr-F */ - break; - default: - bt_dev_err(hdev, "Unsupported Intel hardware variant (0x%x)", - INTEL_HW_VARIANT(ver->cnvi_bt)); - return -EINVAL; - } - - /* If the device is not in bootloader mode, then the only possible - * choice is to return an error and abort the device initialization. - */ - if (ver->img_type != 0x01) { - bt_dev_err(hdev, "Unsupported Intel firmware variant (0x%x)", - ver->img_type); - return -ENODEV; - } - - /* It is required that every single firmware fragment is acknowledged - * with a command complete event. If the boot parameters indicate - * that this bootloader does not send them, then abort the setup. - */ - if (ver->limited_cce != 0x00) { - bt_dev_err(hdev, "Unsupported Intel firmware loading method (0x%x)", - ver->limited_cce); - return -EINVAL; - } - - /* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */ - if (ver->sbe_type > 0x01) { - bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)", - ver->sbe_type); - return -EINVAL; }
/* If the OTP has no valid Bluetooth device address, then there will @@@ -2529,8 -2538,7 +2529,8 @@@ btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi"); err = request_firmware(&fw, fwname, &hdev->dev); if (err < 0) { - bt_dev_err(hdev, "Failed to load Intel firmware file (%d)", err); + bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", + fwname, err); return err; }
@@@ -2543,28 -2551,22 +2543,28 @@@ goto done; }
+ calltime = ktime_get(); + set_bit(BTUSB_DOWNLOADING, &data->flags);
/* Start firmware downloading and get boot parameter */ - err = btintel_download_firmware_newgen(hdev, fw, boot_param, + err = btintel_download_firmware_newgen(hdev, ver, fw, boot_param, INTEL_HW_VARIANT(ver->cnvi_bt), ver->sbe_type); if (err < 0) { + if (err == -EALREADY) { + /* Firmware has already been loaded */ + set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); + err = 0; + goto done; + } + /* When FW download fails, send Intel Reset to retry * FW download. */ btintel_reset_to_bootloader(hdev); goto done; } - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - - bt_dev_info(hdev, "Waiting for firmware download to complete");
/* Before switching the device into operational mode and with that * booting the loaded firmware, wait for the bootloader notification @@@ -2577,9 -2579,26 +2577,9 @@@ * and thus just timeout if that happens and fail the setup * of this device. */ - err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, - TASK_INTERRUPTIBLE, - msecs_to_jiffies(5000)); - if (err == -EINTR) { - bt_dev_err(hdev, "Firmware loading interrupted"); - goto done; - } - - if (err) { - bt_dev_err(hdev, "Firmware loading timeout"); - err = -ETIMEDOUT; + err = btusb_download_wait(hdev, calltime, 5000); + if (err == -ETIMEDOUT) btintel_reset_to_bootloader(hdev); - goto done; - } - - if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) { - bt_dev_err(hdev, "Firmware loading failed"); - err = -ENOEXEC; - goto done; - }
done: release_firmware(fw); @@@ -2595,11 -2614,41 +2595,11 @@@ static int btusb_intel_download_firmwar char fwname[64]; int err; struct btusb_data *data = hci_get_drvdata(hdev); + ktime_t calltime;
if (!ver || !params) return -EINVAL;
- /* The hardware platform number has a fixed value of 0x37 and - * for now only accept this single value. - */ - if (ver->hw_platform != 0x37) { - bt_dev_err(hdev, "Unsupported Intel hardware platform (%u)", - ver->hw_platform); - return -EINVAL; - } - - /* Check for supported iBT hardware variants of this firmware - * loading method. - * - * This check has been put in place to ensure correct forward - * compatibility options when newer hardware variants come along. - */ - switch (ver->hw_variant) { - case 0x0b: /* SfP */ - case 0x0c: /* WsP */ - case 0x11: /* JfP */ - case 0x12: /* ThP */ - case 0x13: /* HrP */ - case 0x14: /* CcP */ - break; - default: - bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)", - ver->hw_variant); - return -EINVAL; - } - - btintel_version_info(hdev, ver); - /* The firmware variant determines if the device is in bootloader * mode or is running operational firmware. The value 0x06 identifies * the bootloader and the value 0x23 identifies the operational @@@ -2616,18 -2665,16 +2616,18 @@@ if (ver->fw_variant == 0x23) { clear_bit(BTUSB_BOOTLOADER, &data->flags); btintel_check_bdaddr(hdev); - return 0; - }
- /* If the device is not in bootloader mode, then the only possible - * choice is to return an error and abort the device initialization. - */ - if (ver->fw_variant != 0x06) { - bt_dev_err(hdev, "Unsupported Intel firmware variant (%u)", - ver->fw_variant); - return -ENODEV; + /* SfP and WsP don't seem to update the firmware version on file + * so version checking is currently possible. + */ + switch (ver->hw_variant) { + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + return 0; + } + + /* Proceed to download to check if the version matches */ + goto download; }
/* Read the secure boot parameters to identify the operating @@@ -2655,7 -2702,6 +2655,7 @@@ set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); }
+download: /* With this Intel bootloader only the hardware variant and device * revision information are used to select the right firmware for SfP * and WsP. @@@ -2679,15 -2725,14 +2679,15 @@@ */ err = btusb_setup_intel_new_get_fw_name(ver, params, fwname, sizeof(fwname), "sfi"); - if (!err) { + if (err < 0) { bt_dev_err(hdev, "Unsupported Intel firmware naming"); return -EINVAL; }
err = request_firmware(&fw, fwname, &hdev->dev); if (err < 0) { - bt_dev_err(hdev, "Failed to load Intel firmware file (%d)", err); + bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", + fwname, err); return err; }
@@@ -2700,26 -2745,20 +2700,26 @@@ goto done; }
+ calltime = ktime_get(); + set_bit(BTUSB_DOWNLOADING, &data->flags);
/* Start firmware downloading and get boot parameter */ - err = btintel_download_firmware(hdev, fw, boot_param); + err = btintel_download_firmware(hdev, ver, fw, boot_param); if (err < 0) { + if (err == -EALREADY) { + /* Firmware has already been loaded */ + set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); + err = 0; + goto done; + } + /* When FW download fails, send Intel Reset to retry * FW download. */ btintel_reset_to_bootloader(hdev); goto done; } - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - - bt_dev_info(hdev, "Waiting for firmware download to complete");
/* Before switching the device into operational mode and with that * booting the loaded firmware, wait for the bootloader notification @@@ -2732,74 -2771,29 +2732,74 @@@ * and thus just timeout if that happens and fail the setup * of this device. */ - err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, + err = btusb_download_wait(hdev, calltime, 5000); + if (err == -ETIMEDOUT) + btintel_reset_to_bootloader(hdev); + +done: + release_firmware(fw); + return err; +} + +static int btusb_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec) +{ + struct btusb_data *data = hci_get_drvdata(hdev); + ktime_t delta, rettime; + unsigned long long duration; + int err; + + bt_dev_info(hdev, "Waiting for device to boot"); + + err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING, TASK_INTERRUPTIBLE, - msecs_to_jiffies(5000)); + msecs_to_jiffies(msec)); if (err == -EINTR) { - bt_dev_err(hdev, "Firmware loading interrupted"); - goto done; + bt_dev_err(hdev, "Device boot interrupted"); + return -EINTR; }
if (err) { - bt_dev_err(hdev, "Firmware loading timeout"); - err = -ETIMEDOUT; - btintel_reset_to_bootloader(hdev); - goto done; + bt_dev_err(hdev, "Device boot timeout"); + return -ETIMEDOUT; }
- if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) { - bt_dev_err(hdev, "Firmware loading failed"); - err = -ENOEXEC; - goto done; + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + bt_dev_info(hdev, "Device booted in %llu usecs", duration); + + return 0; +} + +static int btusb_intel_boot(struct hci_dev *hdev, u32 boot_addr) +{ + struct btusb_data *data = hci_get_drvdata(hdev); + ktime_t calltime; + int err; + + calltime = ktime_get(); + + set_bit(BTUSB_BOOTING, &data->flags); + + err = btintel_send_intel_reset(hdev, boot_addr); + if (err) { + bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err); + btintel_reset_to_bootloader(hdev); + return err; }
-done: - release_firmware(fw); + /* The bootloader will not indicate when the device is ready. This + * is done by the operational firmware sending bootup notification. + * + * Booting into operational firmware should not take longer than + * 1 second. However if that happens, then just fail the setup + * since something went wrong. + */ + err = btusb_boot_wait(hdev, calltime, 1000); + if (err == -ETIMEDOUT) + btintel_reset_to_bootloader(hdev); + return err; }
@@@ -2810,6 -2804,8 +2810,6 @@@ static int btusb_setup_intel_new(struc struct intel_boot_params params; u32 boot_param; char ddcname[64]; - ktime_t calltime, delta, rettime; - unsigned long long duration; int err; struct intel_debug_features features;
@@@ -2821,6 -2817,8 +2821,6 @@@ */ boot_param = 0x00000000;
- calltime = ktime_get(); - /* Read the Intel version information to determine if the device * is in bootloader mode or if it already has operational firmware * loaded. @@@ -2832,10 -2830,6 +2832,10 @@@ return err; }
+ err = btintel_version_info(hdev, &ver); + if (err) + return err; + err = btusb_intel_download_firmware(hdev, &ver, ¶ms, &boot_param); if (err) return err; @@@ -2844,16 -2838,59 +2844,16 @@@ if (ver.fw_variant == 0x23) goto finish;
- rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long) ktime_to_ns(delta) >> 10; - - bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration); - - calltime = ktime_get(); - - set_bit(BTUSB_BOOTING, &data->flags); - - err = btintel_send_intel_reset(hdev, boot_param); - if (err) { - bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err); - btintel_reset_to_bootloader(hdev); + err = btusb_intel_boot(hdev, boot_param); + if (err) return err; - } - - /* The bootloader will not indicate when the device is ready. This - * is done by the operational firmware sending bootup notification. - * - * Booting into operational firmware should not take longer than - * 1 second. However if that happens, then just fail the setup - * since something went wrong. - */ - bt_dev_info(hdev, "Waiting for device to boot"); - - err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING, - TASK_INTERRUPTIBLE, - msecs_to_jiffies(1000)); - - if (err == -EINTR) { - bt_dev_err(hdev, "Device boot interrupted"); - return -EINTR; - } - - if (err) { - bt_dev_err(hdev, "Device boot timeout"); - btintel_reset_to_bootloader(hdev); - return -ETIMEDOUT; - } - - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long) ktime_to_ns(delta) >> 10; - - bt_dev_info(hdev, "Device booted in %llu usecs", duration);
clear_bit(BTUSB_BOOTLOADER, &data->flags);
err = btusb_setup_intel_new_get_fw_name(&ver, ¶ms, ddcname, sizeof(ddcname), "ddc");
- if (!err) { + if (err < 0) { bt_dev_err(hdev, "Unsupported Intel firmware naming"); } else { /* Once the device is running in operational mode, it needs to @@@ -2910,6 -2947,8 +2910,6 @@@ static int btusb_setup_intel_newgen(str struct btusb_data *data = hci_get_drvdata(hdev); u32 boot_param; char ddcname[64]; - ktime_t calltime, delta, rettime; - unsigned long long duration; int err; struct intel_debug_features features; struct intel_version_tlv version; @@@ -2922,6 -2961,8 +2922,6 @@@ */ boot_param = 0x00000000;
- calltime = ktime_get(); - /* Read the Intel version information to determine if the device * is in bootloader mode or if it already has operational firmware * loaded. @@@ -2933,9 -2974,7 +2933,9 @@@ return err; }
- btintel_version_info_tlv(hdev, &version); + err = btintel_version_info_tlv(hdev, &version); + if (err) + return err;
err = btusb_intel_download_firmware_newgen(hdev, &version, &boot_param); if (err) @@@ -2945,9 -2984,52 +2945,9 @@@ if (version.img_type == 0x03) goto finish;
- rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long)ktime_to_ns(delta) >> 10; - - bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration); - - calltime = ktime_get(); - - set_bit(BTUSB_BOOTING, &data->flags); - - err = btintel_send_intel_reset(hdev, boot_param); - if (err) { - bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err); - btintel_reset_to_bootloader(hdev); + err = btusb_intel_boot(hdev, boot_param); + if (err) return err; - } - - /* The bootloader will not indicate when the device is ready. This - * is done by the operational firmware sending bootup notification. - * - * Booting into operational firmware should not take longer than - * 1 second. However if that happens, then just fail the setup - * since something went wrong. - */ - bt_dev_info(hdev, "Waiting for device to boot"); - - err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING, - TASK_INTERRUPTIBLE, - msecs_to_jiffies(1000)); - - if (err == -EINTR) { - bt_dev_err(hdev, "Device boot interrupted"); - return -EINTR; - } - - if (err) { - bt_dev_err(hdev, "Device boot timeout"); - btintel_reset_to_bootloader(hdev); - return -ETIMEDOUT; - } - - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long)ktime_to_ns(delta) >> 10; - - bt_dev_info(hdev, "Device booted in %llu usecs", duration);
clear_bit(BTUSB_BOOTLOADER, &data->flags);
@@@ -3413,7 -3495,7 +3413,7 @@@ static int btusb_mtk_setup_firmware_79x fw_ptr = fw->data; fw_bin_ptr = fw_ptr; globaldesc = (struct btmtk_global_desc *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE); - section_num = globaldesc->section_num; + section_num = le32_to_cpu(globaldesc->section_num);
for (i = 0; i < section_num; i++) { first_block = 1; @@@ -3421,8 -3503,8 +3421,8 @@@ sectionmap = (struct btmtk_section_map *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE + MTK_FW_ROM_PATCH_GD_SIZE + MTK_FW_ROM_PATCH_SEC_MAP_SIZE * i);
- section_offset = sectionmap->secoffset; - dl_size = sectionmap->bin_info_spec.dlsize; + section_offset = le32_to_cpu(sectionmap->secoffset); + dl_size = le32_to_cpu(sectionmap->bin_info_spec.dlsize);
if (dl_size > 0) { retry = 20; @@@ -3658,7 -3740,7 +3658,7 @@@ static int btusb_mtk_setup(struct hci_d int err, status; u32 dev_id; char fw_bin_name[64]; - u32 fw_version; + u32 fw_version = 0; u8 param;
calltime = ktime_get(); @@@ -4767,8 -4849,8 +4767,8 @@@ static int btusb_probe(struct usb_inter data->diag = NULL; }
- if (!enable_autosuspend) - usb_disable_autosuspend(data->udev); + if (enable_autosuspend) + usb_enable_autosuspend(data->udev);
err = hci_register_dev(hdev); if (err < 0) @@@ -4828,9 -4910,6 +4828,6 @@@ static void btusb_disconnect(struct usb gpiod_put(data->reset_gpio);
hci_free_dev(hdev); - - if (!enable_autosuspend) - usb_enable_autosuspend(data->udev); }
#ifdef CONFIG_PM diff --combined drivers/net/can/spi/mcp251x.c index 80ab1593ca31,a57da43680d8..492f1bcb0516 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@@ -276,7 -276,7 +276,7 @@@ static void mcp251x_clean(struct net_de net->stats.tx_errors++; dev_kfree_skb(priv->tx_skb); if (priv->tx_len) - can_free_echo_skb(priv->net, 0); + can_free_echo_skb(priv->net, 0, NULL); priv->tx_skb = NULL; priv->tx_len = 0; } @@@ -314,6 -314,18 +314,18 @@@ static int mcp251x_spi_trans(struct spi return ret; }
+ static int mcp251x_spi_write(struct spi_device *spi, int len) + { + struct mcp251x_priv *priv = spi_get_drvdata(spi); + int ret; + + ret = spi_write(spi, priv->spi_tx_buf, len); + if (ret) + dev_err(&spi->dev, "spi write failed: ret = %d\n", ret); + + return ret; + } + static u8 mcp251x_read_reg(struct spi_device *spi, u8 reg) { struct mcp251x_priv *priv = spi_get_drvdata(spi); @@@ -361,7 -373,7 +373,7 @@@ static void mcp251x_write_reg(struct sp priv->spi_tx_buf[1] = reg; priv->spi_tx_buf[2] = val;
- mcp251x_spi_trans(spi, 3); + mcp251x_spi_write(spi, 3); }
static void mcp251x_write_2regs(struct spi_device *spi, u8 reg, u8 v1, u8 v2) @@@ -373,7 -385,7 +385,7 @@@ priv->spi_tx_buf[2] = v1; priv->spi_tx_buf[3] = v2;
- mcp251x_spi_trans(spi, 4); + mcp251x_spi_write(spi, 4); }
static void mcp251x_write_bits(struct spi_device *spi, u8 reg, @@@ -386,7 -398,7 +398,7 @@@ priv->spi_tx_buf[2] = mask; priv->spi_tx_buf[3] = val;
- mcp251x_spi_trans(spi, 4); + mcp251x_spi_write(spi, 4); }
static u8 mcp251x_read_stat(struct spi_device *spi) @@@ -618,7 -630,7 +630,7 @@@ static void mcp251x_hw_tx_frame(struct buf[i]); } else { memcpy(priv->spi_tx_buf, buf, TXBDAT_OFF + len); - mcp251x_spi_trans(spi, TXBDAT_OFF + len); + mcp251x_spi_write(spi, TXBDAT_OFF + len); } }
@@@ -650,7 -662,7 +662,7 @@@ static void mcp251x_hw_tx(struct spi_de
/* use INSTRUCTION_RTS, to avoid "repeated frame problem" */ priv->spi_tx_buf[0] = INSTRUCTION_RTS(1 << tx_buf_idx); - mcp251x_spi_trans(priv->spi, 1); + mcp251x_spi_write(priv->spi, 1); }
static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf, @@@ -888,7 -900,7 +900,7 @@@ static int mcp251x_hw_reset(struct spi_ mdelay(MCP251X_OST_DELAY_MS);
priv->spi_tx_buf[0] = INSTRUCTION_RESET; - ret = mcp251x_spi_trans(spi, 1); + ret = mcp251x_spi_write(spi, 1); if (ret) return ret;
diff --combined drivers/net/can/usb/peak_usb/pcan_usb_core.c index ad006edf474d,28e916a04047..e69b005be068 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@@ -14,7 -14,6 +14,7 @@@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/usb.h> +#include <linux/ethtool.h>
#include <linux/can.h> #include <linux/can/dev.h> @@@ -372,7 -371,7 +372,7 @@@ static netdev_tx_t peak_usb_ndo_start_x
err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL);
usb_unanchor_urb(urb);
@@@ -821,9 -820,6 +821,9 @@@ static int peak_usb_create_dev(const st
netdev->flags |= IFF_ECHO; /* we support local echo */
+ /* add ethtool support */ + netdev->ethtool_ops = peak_usb_adapter->ethtool_ops; + init_usb_anchor(&dev->rx_submitted);
init_usb_anchor(&dev->tx_submitted); @@@ -861,7 -857,7 +861,7 @@@ if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 0); if (err) - goto lbl_unregister_candev; + goto adap_dev_free; }
/* get device number early */ @@@ -873,6 -869,10 +873,10 @@@
return 0;
+ adap_dev_free: + if (dev->adapter->dev_free) + dev->adapter->dev_free(dev); + lbl_unregister_candev: unregister_candev(netdev);
diff --combined drivers/net/dsa/lantiq_gswip.c index 26d0e3bb5dea,bf5c62e5c0b0..314ae78bbdd6 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@@ -1,6 -1,6 +1,6 @@@ // SPDX-License-Identifier: GPL-2.0 /* - * Lantiq / Intel GSWIP switch driver for VRX200 SoCs + * Lantiq / Intel GSWIP switch driver for VRX200, xRX300 and xRX330 SoCs * * Copyright (C) 2010 Lantiq Deutschland * Copyright (C) 2012 John Crispin john@phrozen.org @@@ -93,14 -93,17 +93,18 @@@
/* GSWIP MII Registers */ #define GSWIP_MII_CFGp(p) (0x2 * (p)) + #define GSWIP_MII_CFG_RESET BIT(15) #define GSWIP_MII_CFG_EN BIT(14) + #define GSWIP_MII_CFG_ISOLATE BIT(13) #define GSWIP_MII_CFG_LDCLKDIS BIT(12) + #define GSWIP_MII_CFG_RGMII_IBS BIT(8) + #define GSWIP_MII_CFG_RMII_CLK BIT(7) #define GSWIP_MII_CFG_MODE_MIIP 0x0 #define GSWIP_MII_CFG_MODE_MIIM 0x1 #define GSWIP_MII_CFG_MODE_RMIIP 0x2 #define GSWIP_MII_CFG_MODE_RMIIM 0x3 #define GSWIP_MII_CFG_MODE_RGMII 0x4 +#define GSWIP_MII_CFG_MODE_GMII 0x9 #define GSWIP_MII_CFG_MODE_MASK 0xf #define GSWIP_MII_CFG_RATE_M2P5 0x00 #define GSWIP_MII_CFG_RATE_M25 0x10 @@@ -191,6 -194,23 +195,23 @@@ #define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA))
#define GSWIP_MAC_FLEN 0x8C5 + #define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC)) + #define GSWIP_MAC_CTRL_0_PADEN BIT(8) + #define GSWIP_MAC_CTRL_0_FCS_EN BIT(7) + #define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070 + #define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000 + #define GSWIP_MAC_CTRL_0_FCON_RX 0x0010 + #define GSWIP_MAC_CTRL_0_FCON_TX 0x0020 + #define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030 + #define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040 + #define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C + #define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000 + #define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004 + #define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C + #define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003 + #define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000 + #define GSWIP_MAC_CTRL_0_GMII_MII 0x0001 + #define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002 #define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC)) #define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */
@@@ -221,7 -241,6 +242,7 @@@ struct gswip_hw_info { int max_ports; int cpu_port; + const struct dsa_switch_ops *ops; };
struct xway_gphy_match_data { @@@ -655,16 -674,13 +676,13 @@@ static int gswip_port_enable(struct dsa GSWIP_SDMA_PCTRLp(port));
if (!dsa_is_cpu_port(ds, port)) { - u32 macconf = GSWIP_MDIO_PHY_LINK_AUTO | - GSWIP_MDIO_PHY_SPEED_AUTO | - GSWIP_MDIO_PHY_FDUP_AUTO | - GSWIP_MDIO_PHY_FCONTX_AUTO | - GSWIP_MDIO_PHY_FCONRX_AUTO | - (phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK); - - gswip_mdio_w(priv, macconf, GSWIP_MDIO_PHYp(port)); - /* Activate MDIO auto polling */ - gswip_mdio_mask(priv, 0, BIT(port), GSWIP_MDIO_MDC_CFG0); + u32 mdio_phy = 0; + + if (phydev) + mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK; + + gswip_mdio_mask(priv, GSWIP_MDIO_PHY_ADDR_MASK, mdio_phy, + GSWIP_MDIO_PHYp(port)); }
return 0; @@@ -677,14 -693,6 +695,6 @@@ static void gswip_port_disable(struct d if (!dsa_is_user_port(ds, port)) return;
- if (!dsa_is_cpu_port(ds, port)) { - gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_DOWN, - GSWIP_MDIO_PHY_LINK_MASK, - GSWIP_MDIO_PHYp(port)); - /* Deactivate MDIO auto polling */ - gswip_mdio_mask(priv, BIT(port), 0, GSWIP_MDIO_MDC_CFG0); - } - gswip_switch_mask(priv, GSWIP_FDMA_PCTRL_EN, 0, GSWIP_FDMA_PCTRLp(port)); gswip_switch_mask(priv, GSWIP_SDMA_PCTRL_EN, 0, @@@ -796,14 -804,32 +806,32 @@@ static int gswip_setup(struct dsa_switc gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP2); gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP3);
- /* disable PHY auto polling */ + /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an + * interoperability problem with this auto polling mechanism because + * their status registers think that the link is in a different state + * than it actually is. For the AR8030 it has the BMSR_ESTATEN bit set + * as well as ESTATUS_1000_TFULL and ESTATUS_1000_XFULL. This makes the + * auto polling state machine consider the link being negotiated with + * 1Gbit/s. Since the PHY itself is a Fast Ethernet RMII PHY this leads + * to the switch port being completely dead (RX and TX are both not + * working). + * Also with various other PHY / port combinations (PHY11G GPHY, PHY22F + * GPHY, external RGMII PEF7071/7072) any traffic would stop. Sometimes + * it would work fine for a few minutes to hours and then stop, on + * other device it would no traffic could be sent or received at all. + * Testing shows that when PHY auto polling is disabled these problems + * go away. + */ gswip_mdio_w(priv, 0x0, GSWIP_MDIO_MDC_CFG0); + /* Configure the MDIO Clock 2.5 MHz */ gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
- /* Disable the xMII link */ + /* Disable the xMII interface and clear it's isolation bit */ for (i = 0; i < priv->hw_info->max_ports; i++) - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i); + gswip_mii_mask_cfg(priv, + GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE, + 0, i);
/* enable special tag insertion on cpu port */ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, @@@ -1386,42 -1412,12 +1414,42 @@@ static int gswip_port_fdb_dump(struct d return 0; }
-static void gswip_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void gswip_phylink_set_capab(unsigned long *supported, + struct phylink_link_state *state) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+ /* Allow all the expected bits */ + phylink_set(mask, Autoneg); + phylink_set_port_modes(mask); + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + + /* With the exclusion of MII, Reverse MII and Reduced MII, we + * support Gigabit, including Half duplex + */ + if (state->interface != PHY_INTERFACE_MODE_MII && + state->interface != PHY_INTERFACE_MODE_REVMII && + state->interface != PHY_INTERFACE_MODE_RMII) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseT_Half); + } + + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void gswip_xrx200_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ switch (port) { case 0: case 1: @@@ -1448,56 -1444,146 +1476,162 @@@ return; }
- /* Allow all the expected bits */ - phylink_set(mask, Autoneg); - phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); + gswip_phylink_set_capab(supported, state);
- /* With the exclusion of MII, Reverse MII and Reduced MII, we - * support Gigabit, including Half duplex - */ - if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII && - state->interface != PHY_INTERFACE_MODE_RMII) { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseT_Half); + return; + +unsupported: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported interface '%s' for port %d\n", + phy_modes(state->interface), port); +} + +static void gswip_xrx300_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + switch (port) { + case 0: + if (!phy_interface_mode_is_rgmii(state->interface) && + state->interface != PHY_INTERFACE_MODE_GMII && + state->interface != PHY_INTERFACE_MODE_RMII) + goto unsupported; + break; + case 1: + case 2: + case 3: + case 4: + if (state->interface != PHY_INTERFACE_MODE_INTERNAL) + goto unsupported; + break; + case 5: + if (!phy_interface_mode_is_rgmii(state->interface) && + state->interface != PHY_INTERFACE_MODE_INTERNAL && + state->interface != PHY_INTERFACE_MODE_RMII) + goto unsupported; + break; + default: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported port: %i\n", port); + return; }
- phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); + gswip_phylink_set_capab(supported, state);
- bitmap_and(supported, supported, mask, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_and(state->advertising, state->advertising, mask, - __ETHTOOL_LINK_MODE_MASK_NBITS); return;
unsupported: bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); dev_err(ds->dev, "Unsupported interface '%s' for port %d\n", phy_modes(state->interface), port); - return; }
+ static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) + { + u32 mdio_phy; + + if (link) + mdio_phy = GSWIP_MDIO_PHY_LINK_UP; + else + mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN; + + gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_MASK, mdio_phy, + GSWIP_MDIO_PHYp(port)); + } + + static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, + phy_interface_t interface) + { + u32 mdio_phy = 0, mii_cfg = 0, mac_ctrl_0 = 0; + + switch (speed) { + case SPEED_10: + mdio_phy = GSWIP_MDIO_PHY_SPEED_M10; + + if (interface == PHY_INTERFACE_MODE_RMII) + mii_cfg = GSWIP_MII_CFG_RATE_M50; + else + mii_cfg = GSWIP_MII_CFG_RATE_M2P5; + + mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; + break; + + case SPEED_100: + mdio_phy = GSWIP_MDIO_PHY_SPEED_M100; + + if (interface == PHY_INTERFACE_MODE_RMII) + mii_cfg = GSWIP_MII_CFG_RATE_M50; + else + mii_cfg = GSWIP_MII_CFG_RATE_M25; + + mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; + break; + + case SPEED_1000: + mdio_phy = GSWIP_MDIO_PHY_SPEED_G1; + + mii_cfg = GSWIP_MII_CFG_RATE_M125; + + mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_RGMII; + break; + } + + gswip_mdio_mask(priv, GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy, + GSWIP_MDIO_PHYp(port)); + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port); + gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0, + GSWIP_MAC_CTRL_0p(port)); + } + + static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) + { + u32 mac_ctrl_0, mdio_phy; + + if (duplex == DUPLEX_FULL) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_EN; + mdio_phy = GSWIP_MDIO_PHY_FDUP_EN; + } else { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_DIS; + mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS; + } + + gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0, + GSWIP_MAC_CTRL_0p(port)); + gswip_mdio_mask(priv, GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy, + GSWIP_MDIO_PHYp(port)); + } + + static void gswip_port_set_pause(struct gswip_priv *priv, int port, + bool tx_pause, bool rx_pause) + { + u32 mac_ctrl_0, mdio_phy; + + if (tx_pause && rx_pause) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RXTX; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | + GSWIP_MDIO_PHY_FCONRX_EN; + } else if (tx_pause) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_TX; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | + GSWIP_MDIO_PHY_FCONRX_DIS; + } else if (rx_pause) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RX; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | + GSWIP_MDIO_PHY_FCONRX_EN; + } else { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_NONE; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | + GSWIP_MDIO_PHY_FCONRX_DIS; + } + + gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FCON_MASK, + mac_ctrl_0, GSWIP_MAC_CTRL_0p(port)); + gswip_mdio_mask(priv, + GSWIP_MDIO_PHY_FCONTX_MASK | + GSWIP_MDIO_PHY_FCONRX_MASK, + mdio_phy, GSWIP_MDIO_PHYp(port)); + } + static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) @@@ -1517,6 -1603,9 +1651,9 @@@ break; case PHY_INTERFACE_MODE_RMII: miicfg |= GSWIP_MII_CFG_MODE_RMIIM; + + /* Configure the RMII clock as output: */ + miicfg |= GSWIP_MII_CFG_RMII_CLK; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: @@@ -1524,15 -1613,16 +1661,19 @@@ case PHY_INTERFACE_MODE_RGMII_TXID: miicfg |= GSWIP_MII_CFG_MODE_RGMII; break; + case PHY_INTERFACE_MODE_GMII: + miicfg |= GSWIP_MII_CFG_MODE_GMII; + break; default: dev_err(ds->dev, "Unsupported interface: %d\n", state->interface); return; } - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_MODE_MASK, miicfg, port); + + gswip_mii_mask_cfg(priv, + GSWIP_MII_CFG_MODE_MASK | GSWIP_MII_CFG_RMII_CLK | + GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS, + miicfg, port);
switch (state->interface) { case PHY_INTERFACE_MODE_RGMII_ID: @@@ -1557,6 -1647,9 +1698,9 @@@ static void gswip_phylink_mac_link_down struct gswip_priv *priv = ds->priv;
gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, port); + + if (!dsa_is_cpu_port(ds, port)) + gswip_port_set_link(priv, port, false); }
static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, @@@ -1568,6 -1661,13 +1712,13 @@@ { struct gswip_priv *priv = ds->priv;
+ if (!dsa_is_cpu_port(ds, port)) { + gswip_port_set_link(priv, port, true); + gswip_port_set_speed(priv, port, speed, interface); + gswip_port_set_duplex(priv, port, duplex); + gswip_port_set_pause(priv, port, tx_pause, rx_pause); + } + gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); }
@@@ -1639,7 -1739,7 +1790,7 @@@ static int gswip_get_sset_count(struct return ARRAY_SIZE(gswip_rmon_cnt); }
-static const struct dsa_switch_ops gswip_switch_ops = { +static const struct dsa_switch_ops gswip_xrx200_switch_ops = { .get_tag_protocol = gswip_get_tag_protocol, .setup = gswip_setup, .port_enable = gswip_port_enable, @@@ -1654,31 -1754,7 +1805,31 @@@ .port_fdb_add = gswip_port_fdb_add, .port_fdb_del = gswip_port_fdb_del, .port_fdb_dump = gswip_port_fdb_dump, - .phylink_validate = gswip_phylink_validate, + .phylink_validate = gswip_xrx200_phylink_validate, + .phylink_mac_config = gswip_phylink_mac_config, + .phylink_mac_link_down = gswip_phylink_mac_link_down, + .phylink_mac_link_up = gswip_phylink_mac_link_up, + .get_strings = gswip_get_strings, + .get_ethtool_stats = gswip_get_ethtool_stats, + .get_sset_count = gswip_get_sset_count, +}; + +static const struct dsa_switch_ops gswip_xrx300_switch_ops = { + .get_tag_protocol = gswip_get_tag_protocol, + .setup = gswip_setup, + .port_enable = gswip_port_enable, + .port_disable = gswip_port_disable, + .port_bridge_join = gswip_port_bridge_join, + .port_bridge_leave = gswip_port_bridge_leave, + .port_fast_age = gswip_port_fast_age, + .port_vlan_filtering = gswip_port_vlan_filtering, + .port_vlan_add = gswip_port_vlan_add, + .port_vlan_del = gswip_port_vlan_del, + .port_stp_state_set = gswip_port_stp_state_set, + .port_fdb_add = gswip_port_fdb_add, + .port_fdb_del = gswip_port_fdb_del, + .port_fdb_dump = gswip_port_fdb_dump, + .phylink_validate = gswip_xrx300_phylink_validate, .phylink_mac_config = gswip_phylink_mac_config, .phylink_mac_link_down = gswip_phylink_mac_link_down, .phylink_mac_link_up = gswip_phylink_mac_link_up, @@@ -1907,7 -1983,7 +2058,7 @@@ remove_gphy static int gswip_probe(struct platform_device *pdev) { struct gswip_priv *priv; - struct device_node *mdio_np, *gphy_fw_np; + struct device_node *np, *mdio_np, *gphy_fw_np; struct device *dev = &pdev->dev; int err; int i; @@@ -1940,28 -2016,10 +2091,28 @@@ priv->ds->dev = dev; priv->ds->num_ports = priv->hw_info->max_ports; priv->ds->priv = priv; - priv->ds->ops = &gswip_switch_ops; + priv->ds->ops = priv->hw_info->ops; priv->dev = dev; version = gswip_switch_r(priv, GSWIP_VERSION);
+ np = dev->of_node; + switch (version) { + case GSWIP_VERSION_2_0: + case GSWIP_VERSION_2_1: + if (!of_device_is_compatible(np, "lantiq,xrx200-gswip")) + return -EINVAL; + break; + case GSWIP_VERSION_2_2: + case GSWIP_VERSION_2_2_ETC: + if (!of_device_is_compatible(np, "lantiq,xrx300-gswip") && + !of_device_is_compatible(np, "lantiq,xrx330-gswip")) + return -EINVAL; + break; + default: + dev_err(dev, "unknown GSWIP version: 0x%x", version); + return -ENOENT; + } + /* bring up the mdio bus */ gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw"); if (gphy_fw_np) { @@@ -2039,19 -2097,10 +2190,19 @@@ static int gswip_remove(struct platform static const struct gswip_hw_info gswip_xrx200 = { .max_ports = 7, .cpu_port = 6, + .ops = &gswip_xrx200_switch_ops, +}; + +static const struct gswip_hw_info gswip_xrx300 = { + .max_ports = 7, + .cpu_port = 6, + .ops = &gswip_xrx300_switch_ops, };
static const struct of_device_id gswip_of_match[] = { { .compatible = "lantiq,xrx200-gswip", .data = &gswip_xrx200 }, + { .compatible = "lantiq,xrx300-gswip", .data = &gswip_xrx300 }, + { .compatible = "lantiq,xrx330-gswip", .data = &gswip_xrx300 }, {}, }; MODULE_DEVICE_TABLE(of, gswip_of_match); diff --combined drivers/net/ethernet/broadcom/bcm4908_enet.c index cbfed1d1477b,65981931a798..b7afac5c7ca7 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@@ -9,7 -9,6 +9,7 @@@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_net.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/string.h> @@@ -54,7 -53,6 +54,7 @@@ struct bcm4908_enet_dma_ring int length; u16 cfg_block; u16 st_ram_block; + struct napi_struct napi;
union { void *cpu_addr; @@@ -68,8 -66,8 +68,8 @@@ struct bcm4908_enet { struct device *dev; struct net_device *netdev; - struct napi_struct napi; void __iomem *base; + int irq_tx;
struct bcm4908_enet_dma_ring tx_ring; struct bcm4908_enet_dma_ring rx_ring; @@@ -124,31 -122,24 +124,31 @@@ static void enet_umac_set(struct bcm490 * Helpers */
-static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet) +static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); + enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD); }
-static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet) +/*** + * DMA ring ops + */ + +static void bcm4908_enet_dma_ring_intrs_on(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); }
-static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet) +static void bcm4908_enet_dma_ring_intrs_off(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0); }
-static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu) +static void bcm4908_enet_dma_ring_intrs_ack(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); }
/*** @@@ -181,6 -172,7 +181,7 @@@ static int bcm4908_dma_alloc_buf_descs(
err_free_buf_descs: dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr); + ring->cpu_addr = NULL; return -ENOMEM; }
@@@ -422,14 -414,11 +423,14 @@@ static void bcm4908_enet_gmac_init(stru static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id) { struct bcm4908_enet *enet = dev_id; + struct bcm4908_enet_dma_ring *ring;
- bcm4908_enet_intrs_off(enet); - bcm4908_enet_intrs_ack(enet); + ring = (irq == enet->irq_tx) ? &enet->tx_ring : &enet->rx_ring;
- napi_schedule(&enet->napi); + bcm4908_enet_dma_ring_intrs_off(enet, ring); + bcm4908_enet_dma_ring_intrs_ack(enet, ring); + + napi_schedule(&ring->napi);
return IRQ_HANDLED; } @@@ -437,8 -426,6 +438,8 @@@ static int bcm4908_enet_open(struct net_device *netdev) { struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; struct device *dev = enet->dev; int err;
@@@ -448,17 -435,6 +449,17 @@@ return err; }
+ if (enet->irq_tx > 0) { + err = request_irq(enet->irq_tx, bcm4908_enet_irq_handler, 0, + "tx", enet); + if (err) { + dev_err(dev, "Failed to request IRQ %d: %d\n", + enet->irq_tx, err); + free_irq(netdev->irq, enet); + return err; + } + } + bcm4908_enet_gmac_init(enet); bcm4908_enet_dma_reset(enet); bcm4908_enet_dma_init(enet); @@@ -467,19 -443,14 +468,19 @@@
enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN); enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0); - bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
- napi_enable(&enet->napi); + if (enet->irq_tx > 0) { + napi_enable(&tx_ring->napi); + bcm4908_enet_dma_ring_intrs_ack(enet, tx_ring); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); + } + + bcm4908_enet_dma_rx_ring_enable(enet, rx_ring); + napi_enable(&rx_ring->napi); netif_carrier_on(netdev); netif_start_queue(netdev); - - bcm4908_enet_intrs_ack(enet); - bcm4908_enet_intrs_on(enet); + bcm4908_enet_dma_ring_intrs_ack(enet, rx_ring); + bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
return 0; } @@@ -487,20 -458,16 +488,20 @@@ static int bcm4908_enet_stop(struct net_device *netdev) { struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
netif_stop_queue(netdev); netif_carrier_off(netdev); - napi_disable(&enet->napi); + napi_disable(&rx_ring->napi); + napi_disable(&tx_ring->napi);
bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring); bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring);
bcm4908_enet_dma_uninit(enet);
+ free_irq(enet->irq_tx, enet); free_irq(enet->netdev->irq, enet);
return 0; @@@ -517,19 -484,25 +518,19 @@@ static int bcm4908_enet_start_xmit(stru u32 tmp;
/* Free transmitted skbs */ - while (ring->read_idx != ring->write_idx) { - buf_desc = &ring->buf_desc[ring->read_idx]; - if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) - break; - slot = &ring->slots[ring->read_idx]; - - dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); - dev_kfree_skb(slot->skb); - if (++ring->read_idx == ring->length) - ring->read_idx = 0; - } + if (enet->irq_tx < 0 && + !(le32_to_cpu(ring->buf_desc[ring->read_idx].ctl) & DMA_CTL_STATUS_OWN)) + napi_schedule(&enet->tx_ring.napi);
/* Don't use the last empty buf descriptor */ if (ring->read_idx <= ring->write_idx) free_buf_descs = ring->read_idx - ring->write_idx + ring->length; else free_buf_descs = ring->read_idx - ring->write_idx; - if (free_buf_descs < 2) + if (free_buf_descs < 2) { + netif_stop_queue(netdev); return NETDEV_TX_BUSY; + }
/* Hardware removes OWN bit after sending data */ buf_desc = &ring->buf_desc[ring->write_idx]; @@@ -566,10 -539,9 +567,10 @@@ return NETDEV_TX_OK; }
-static int bcm4908_enet_poll(struct napi_struct *napi, int weight) +static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight) { - struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi); + struct bcm4908_enet_dma_ring *rx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi); + struct bcm4908_enet *enet = container_of(rx_ring, struct bcm4908_enet, rx_ring); struct device *dev = enet->dev; int handled = 0;
@@@ -618,7 -590,7 +619,7 @@@
if (handled < weight) { napi_complete_done(napi, handled); - bcm4908_enet_intrs_on(enet); + bcm4908_enet_dma_ring_intrs_on(enet, rx_ring); }
/* Hardware could disable ring if it run out of descriptors */ @@@ -627,42 -599,6 +628,42 @@@ return handled; }
+static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight) +{ + struct bcm4908_enet_dma_ring *tx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi); + struct bcm4908_enet *enet = container_of(tx_ring, struct bcm4908_enet, tx_ring); + struct bcm4908_enet_dma_ring_bd *buf_desc; + struct bcm4908_enet_dma_ring_slot *slot; + struct device *dev = enet->dev; + unsigned int bytes = 0; + int handled = 0; + + while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) { + buf_desc = &tx_ring->buf_desc[tx_ring->read_idx]; + if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) + break; + slot = &tx_ring->slots[tx_ring->read_idx]; + + dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); + dev_kfree_skb(slot->skb); + bytes += slot->len; + if (++tx_ring->read_idx == tx_ring->length) + tx_ring->read_idx = 0; + + handled++; + } + + if (handled < weight) { + napi_complete_done(napi, handled); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); + } + + if (netif_queue_stopped(enet->netdev)) + netif_wake_queue(enet->netdev); + + return handled; +} + static int bcm4908_enet_change_mtu(struct net_device *netdev, int new_mtu) { struct bcm4908_enet *enet = netdev_priv(netdev); @@@ -685,7 -621,6 +686,7 @@@ static int bcm4908_enet_probe(struct pl struct device *dev = &pdev->dev; struct net_device *netdev; struct bcm4908_enet *enet; + const u8 *mac; int err;
netdev = devm_alloc_etherdev(dev, sizeof(*enet)); @@@ -706,8 -641,6 +707,8 @@@ if (netdev->irq < 0) return netdev->irq;
+ enet->irq_tx = platform_get_irq_byname(pdev, "tx"); + dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
err = bcm4908_enet_dma_alloc(enet); @@@ -715,17 -648,12 +716,17 @@@ return err;
SET_NETDEV_DEV(netdev, &pdev->dev); - eth_hw_addr_random(netdev); + mac = of_get_mac_address(dev->of_node); + if (!IS_ERR(mac)) + ether_addr_copy(netdev->dev_addr, mac); + else + eth_hw_addr_random(netdev); netdev->netdev_ops = &bcm4908_enet_netdev_ops; netdev->min_mtu = ETH_ZLEN; netdev->mtu = ETH_DATA_LEN; netdev->max_mtu = ENET_MTU_MAX; - netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64); + netif_tx_napi_add(netdev, &enet->tx_ring.napi, bcm4908_enet_poll_tx, NAPI_POLL_WEIGHT); + netif_napi_add(netdev, &enet->rx_ring.napi, bcm4908_enet_poll_rx, NAPI_POLL_WEIGHT);
err = register_netdev(netdev); if (err) { @@@ -743,8 -671,7 +744,8 @@@ static int bcm4908_enet_remove(struct p struct bcm4908_enet *enet = platform_get_drvdata(pdev);
unregister_netdev(enet->netdev); - netif_napi_del(&enet->napi); + netif_napi_del(&enet->rx_ring.napi); + netif_napi_del(&enet->tx_ring.napi); bcm4908_enet_dma_free(enet);
return 0; diff --combined drivers/net/ethernet/cadence/macb_main.c index f56f3dbbc015,6e5cf490c01d..ffd56a23f8b0 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@@ -694,22 -694,6 +694,22 @@@ static void macb_mac_config(struct phyl if (old_ncr ^ ncr) macb_or_gem_writel(bp, NCR, ncr);
+ /* Disable AN for SGMII fixed link configuration, enable otherwise. + * Must be written after PCSSEL is set in NCFGR, + * otherwise writes will not take effect. + */ + if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) { + u32 pcsctrl, old_pcsctrl; + + old_pcsctrl = gem_readl(bp, PCSCNTRL); + if (mode == MLO_AN_FIXED) + pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG); + else + pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG); + if (old_pcsctrl != pcsctrl) + gem_writel(bp, PCSCNTRL, pcsctrl); + } + spin_unlock_irqrestore(&bp->lock, flags); }
@@@ -863,15 -847,6 +863,15 @@@ static int macb_phylink_connect(struct return 0; }
+static void macb_get_pcs_fixed_state(struct phylink_config *config, + struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct macb *bp = netdev_priv(ndev); + + state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0; +} + /* based on au1000_eth. c*/ static int macb_mii_probe(struct net_device *dev) { @@@ -880,11 -855,6 +880,11 @@@ bp->phylink_config.dev = &dev->dev; bp->phylink_config.type = PHYLINK_NETDEV;
+ if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { + bp->phylink_config.poll_fixed_state = true; + bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state; + } + bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode, bp->phy_interface, &macb_phylink_ops); if (IS_ERR(bp->phylink)) { @@@ -3269,6 -3239,9 +3269,9 @@@ static void gem_prog_cmp_regs(struct ma bool cmp_b = false; bool cmp_c = false;
+ if (!macb_is_gem(bp)) + return; + tp4sp_v = &(fs->h_u.tcp_ip4_spec); tp4sp_m = &(fs->m_u.tcp_ip4_spec);
@@@ -3637,6 -3610,7 +3640,7 @@@ static void macb_restore_features(struc { struct net_device *netdev = bp->dev; netdev_features_t features = netdev->features; + struct ethtool_rx_fs_item *item;
/* TX checksum offload */ macb_set_txcsum_feature(bp, features); @@@ -3645,6 -3619,9 +3649,9 @@@ macb_set_rxcsum_feature(bp, features);
/* RX Flow Filters */ + list_for_each_entry(item, &bp->rx_fs_list.list, list) + gem_prog_cmp_regs(bp, &item->fs); + macb_set_rxflow_feature(bp, features); }
@@@ -3758,15 -3735,17 +3765,15 @@@ static int macb_clk_init(struct platfor *hclk = devm_clk_get(&pdev->dev, "hclk"); }
- if (IS_ERR_OR_NULL(*pclk)) { - err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); - return err; - } + if (IS_ERR_OR_NULL(*pclk)) + return dev_err_probe(&pdev->dev, + IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV, + "failed to get pclk\n");
- if (IS_ERR_OR_NULL(*hclk)) { - err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); - return err; - } + if (IS_ERR_OR_NULL(*hclk)) + return dev_err_probe(&pdev->dev, + IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV, + "failed to get hclk\n");
*tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk"); if (IS_ERR(*tx_clk)) diff --combined drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index d2ba40c19696,23a2ebdfd503..a7f291c89702 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@@ -1794,11 -1794,25 +1794,25 @@@ int cudbg_collect_sge_indirect(struct c struct cudbg_buffer temp_buff = { 0 }; struct sge_qbase_reg_field *sge_qbase; struct ireg_buf *ch_sge_dbg; + u8 padap_running = 0; int i, rc; + u32 size;
- rc = cudbg_get_buff(pdbg_init, dbg_buff, - sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase), - &temp_buff); + /* Accessing SGE_QBASE_MAP[0-3] and SGE_QBASE_INDEX regs can + * lead to SGE missing doorbells under heavy traffic. So, only + * collect them when adapter is idle. + */ + for_each_port(padap, i) { + padap_running = netif_running(padap->port[i]); + if (padap_running) + break; + } + + size = sizeof(*ch_sge_dbg) * 2; + if (!padap_running) + size += sizeof(*sge_qbase); + + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc;
@@@ -1820,7 -1834,8 +1834,8 @@@ ch_sge_dbg++; }
- if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5 && + !padap_running) { sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg; /* 1 addr reg SGE_QBASE_INDEX and 4 data reg * SGE_QBASE_MAP[0-3] @@@ -3536,7 -3551,8 +3551,7 @@@ out }
out_free: - if (data) - kvfree(data); + kvfree(data);
#undef QDESC_GET_FLQ #undef QDESC_GET_RXQ diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c446b63be503,b0dbe6dcaa7b..1c17fdc780e9 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@@ -62,7 -62,7 +62,7 @@@ static void hclge_sync_vlan_filter(stru static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle); static void hclge_rfs_filter_expire(struct hclge_dev *hdev); -static void hclge_clear_arfs_rules(struct hnae3_handle *handle); +static int hclge_clear_arfs_rules(struct hclge_dev *hdev); static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, unsigned long *addr); static int hclge_set_default_loopback(struct hclge_dev *hdev); @@@ -70,7 -70,6 +70,7 @@@ static void hclge_sync_mac_table(struct hclge_dev *hdev); static void hclge_restore_hw_table(struct hclge_dev *hdev); static void hclge_sync_promisc_mode(struct hclge_dev *hdev); +static void hclge_sync_fd_table(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@@@ -385,62 -384,36 +385,62 @@@ static const struct key_info meta_data_ };
static const struct key_info tuple_key_info[] = { - { OUTER_DST_MAC, 48}, - { OUTER_SRC_MAC, 48}, - { OUTER_VLAN_TAG_FST, 16}, - { OUTER_VLAN_TAG_SEC, 16}, - { OUTER_ETH_TYPE, 16}, - { OUTER_L2_RSV, 16}, - { OUTER_IP_TOS, 8}, - { OUTER_IP_PROTO, 8}, - { OUTER_SRC_IP, 32}, - { OUTER_DST_IP, 32}, - { OUTER_L3_RSV, 16}, - { OUTER_SRC_PORT, 16}, - { OUTER_DST_PORT, 16}, - { OUTER_L4_RSV, 32}, - { OUTER_TUN_VNI, 24}, - { OUTER_TUN_FLOW_ID, 8}, - { INNER_DST_MAC, 48}, - { INNER_SRC_MAC, 48}, - { INNER_VLAN_TAG_FST, 16}, - { INNER_VLAN_TAG_SEC, 16}, - { INNER_ETH_TYPE, 16}, - { INNER_L2_RSV, 16}, - { INNER_IP_TOS, 8}, - { INNER_IP_PROTO, 8}, - { INNER_SRC_IP, 32}, - { INNER_DST_IP, 32}, - { INNER_L3_RSV, 16}, - { INNER_SRC_PORT, 16}, - { INNER_DST_PORT, 16}, - { INNER_L4_RSV, 32}, + { OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 }, + { OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 }, + { OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 }, + { INNER_DST_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.dst_mac), + offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) }, + { INNER_SRC_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.src_mac), + offsetof(struct hclge_fd_rule, tuples_mask.src_mac) }, + { INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.vlan_tag1), + offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) }, + { INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_ETH_TYPE, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.ether_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) }, + { INNER_L2_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l2_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) }, + { INNER_IP_TOS, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_tos), + offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) }, + { INNER_IP_PROTO, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) }, + { INNER_SRC_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.src_ip), + offsetof(struct hclge_fd_rule, tuples_mask.src_ip) }, + { INNER_DST_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.dst_ip), + offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) }, + { INNER_L3_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l3_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) }, + { INNER_SRC_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.src_port), + offsetof(struct hclge_fd_rule, tuples_mask.src_port) }, + { INNER_DST_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.dst_port), + offsetof(struct hclge_fd_rule, tuples_mask.dst_port) }, + { INNER_L4_RSV, 32, KEY_OPT_LE32, + offsetof(struct hclge_fd_rule, tuples.l4_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) }, };
static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) @@@ -553,6 -526,7 +553,6 @@@ static int hclge_mac_update_stats(struc int ret;
ret = hclge_mac_query_reg_num(hdev, &desc_num); - /* The firmware supports the new statistics acquisition method */ if (!ret) ret = hclge_mac_update_stats_complete(hdev, desc_num); @@@ -777,12 -751,12 +777,12 @@@ static int hclge_get_sset_count(struct handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK; handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
- if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && - hdev->hw.mac.phydev->drv->set_loopback) { + if ((hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && + hdev->hw.mac.phydev->drv->set_loopback) || + hnae3_dev_phy_imp_supported(hdev)) { count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } - } else if (stringset == ETH_SS_STATS) { count = ARRAY_SIZE(g_mac_stats_string) + hclge_tqps_get_sset_count(handle, stringset); @@@ -1176,10 -1150,8 +1176,10 @@@ static void hclge_parse_fiber_link_mode if (hnae3_dev_fec_supported(hdev)) hclge_convert_setting_fec(mac);
+ if (hnae3_dev_pause_supported(hdev)) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported); }
@@@ -1191,11 -1163,8 +1191,11 @@@ static void hclge_parse_backplane_link_ hclge_convert_setting_kr(mac, speed_ability); if (hnae3_dev_fec_supported(hdev)) hclge_convert_setting_fec(mac); + + if (hnae3_dev_pause_supported(hdev)) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported); }
@@@ -1224,13 -1193,10 +1224,13 @@@ static void hclge_parse_copper_link_mod linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported); }
+ if (hnae3_dev_pause_supported(hdev)) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); + } + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); }
static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability) @@@ -1290,6 -1256,9 +1290,6 @@@ static void hclge_parse_cfg(struct hclg req = (struct hclge_cfg_param_cmd *)desc[0].data;
/* get the configuration */ - cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]), - HCLGE_CFG_VMDQ_M, - HCLGE_CFG_VMDQ_S); cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S); cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), @@@ -1506,7 -1475,7 +1506,7 @@@ static void hclge_init_kdump_kernel_con "Running kdump kernel. Using minimal resources\n");
/* minimal queue pairs equals to the number of vports */ - hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + hdev->num_tqps = hdev->num_req_vfs + 1; hdev->num_tx_desc = HCLGE_MIN_TX_DESC; hdev->num_rx_desc = HCLGE_MIN_RX_DESC; } @@@ -1521,6 -1490,7 +1521,6 @@@ static int hclge_configure(struct hclge if (ret) return ret;
- hdev->num_vmdq_vport = cfg.vmdq_vport_num; hdev->base_tqp_pid = 0; hdev->vf_rss_size_max = cfg.vf_rss_size_max; hdev->pf_rss_size_max = cfg.pf_rss_size_max; @@@ -1771,7 -1741,7 +1771,7 @@@ static int hclge_map_tqp(struct hclge_d struct hclge_vport *vport = hdev->vport; u16 i, num_vport;
- num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1; for (i = 0; i < num_vport; i++) { int ret;
@@@ -1813,7 -1783,7 +1813,7 @@@ static int hclge_alloc_vport(struct hcl int ret;
/* We need to alloc a vport for main NIC of PF */ - num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1;
if (hdev->num_tqps < num_vport) { dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)", @@@ -2189,6 -2159,7 +2189,6 @@@ static int hclge_only_alloc_priv_buff(s COMPENSATE_HALF_MPS_NUM * half_mps; min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT); rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT); - if (rx_priv < min_rx_priv) return false;
@@@ -2217,7 -2188,7 +2217,7 @@@ /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev * @buf_alloc: pointer to buffer calculation data - * @return: 0: calculate sucessful, negative: fail + * @return: 0: calculate successful, negative: fail */ static int hclge_rx_buffer_calc(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) @@@ -2882,12 -2853,13 +2882,12 @@@ static int hclge_get_mac_phy_link(struc
static void hclge_update_link_status(struct hclge_dev *hdev) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; - struct hnae3_handle *rhandle; - struct hnae3_handle *handle; int state; int ret; - int i;
if (!client) return; @@@ -2902,23 -2874,25 +2902,23 @@@ }
if (state != hdev->hw.mac.link) { - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - handle = &hdev->vport[i].nic; - client->ops->link_status_change(handle, state); - hclge_config_mac_tnl_int(hdev, state); - rhandle = &hdev->vport[i].roce; - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, - state); - } + client->ops->link_status_change(handle, state); + hclge_config_mac_tnl_int(hdev, state); + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, state); + hdev->hw.mac.link = state; }
clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); }
-static void hclge_update_port_capability(struct hclge_mac *mac) +static void hclge_update_port_capability(struct hclge_dev *hdev, + struct hclge_mac *mac) { - /* update fec ability by speed */ - hclge_convert_setting_fec(mac); + if (hnae3_dev_fec_supported(hdev)) + /* update fec ability by speed */ + hclge_convert_setting_fec(mac);
/* firmware can not identify back plane type, the media type * read from configuration can help deal it @@@ -3010,141 -2984,6 +3010,141 @@@ static int hclge_get_sfp_info(struct hc return 0; }
+static int hclge_get_phy_link_ksettings(struct hnae3_handle *handle, + struct ethtool_link_ksettings *cmd) +{ + struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM]; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_phy_link_ksetting_0_cmd *req0; + struct hclge_phy_link_ksetting_1_cmd *req1; + u32 supported, advertising, lp_advertising; + struct hclge_dev *hdev = vport->back; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING, + true); + + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get phy link ksetting, ret = %d.\n", ret); + return ret; + } + + req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data; + cmd->base.autoneg = req0->autoneg; + cmd->base.speed = le32_to_cpu(req0->speed); + cmd->base.duplex = req0->duplex; + cmd->base.port = req0->port; + cmd->base.transceiver = req0->transceiver; + cmd->base.phy_address = req0->phy_address; + cmd->base.eth_tp_mdix = req0->eth_tp_mdix; + cmd->base.eth_tp_mdix_ctrl = req0->eth_tp_mdix_ctrl; + supported = le32_to_cpu(req0->supported); + advertising = le32_to_cpu(req0->advertising); + lp_advertising = le32_to_cpu(req0->lp_advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); + + req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data; + cmd->base.master_slave_cfg = req1->master_slave_cfg; + cmd->base.master_slave_state = req1->master_slave_state; + + return 0; +} + +static int +hclge_set_phy_link_ksettings(struct hnae3_handle *handle, + const struct ethtool_link_ksettings *cmd) +{ + struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM]; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_phy_link_ksetting_0_cmd *req0; + struct hclge_phy_link_ksetting_1_cmd *req1; + struct hclge_dev *hdev = vport->back; + u32 advertising; + int ret; + + if (cmd->base.autoneg == AUTONEG_DISABLE && + ((cmd->base.speed != SPEED_100 && cmd->base.speed != SPEED_10) || + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL))) + return -EINVAL; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING, + false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING, + false); + + req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data; + req0->autoneg = cmd->base.autoneg; + req0->speed = cpu_to_le32(cmd->base.speed); + req0->duplex = cmd->base.duplex; + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + req0->advertising = cpu_to_le32(advertising); + req0->eth_tp_mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data; + req1->master_slave_cfg = cmd->base.master_slave_cfg; + + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to set phy link ksettings, ret = %d.\n", ret); + return ret; + } + + hdev->hw.mac.autoneg = cmd->base.autoneg; + hdev->hw.mac.speed = cmd->base.speed; + hdev->hw.mac.duplex = cmd->base.duplex; + linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising); + + return 0; +} + +static int hclge_update_tp_port_info(struct hclge_dev *hdev) +{ + struct ethtool_link_ksettings cmd; + int ret; + + if (!hnae3_dev_phy_imp_supported(hdev)) + return 0; + + ret = hclge_get_phy_link_ksettings(&hdev->vport->nic, &cmd); + if (ret) + return ret; + + hdev->hw.mac.autoneg = cmd.base.autoneg; + hdev->hw.mac.speed = cmd.base.speed; + hdev->hw.mac.duplex = cmd.base.duplex; + + return 0; +} + +static int hclge_tp_port_init(struct hclge_dev *hdev) +{ + struct ethtool_link_ksettings cmd; + + if (!hnae3_dev_phy_imp_supported(hdev)) + return 0; + + cmd.base.autoneg = hdev->hw.mac.autoneg; + cmd.base.speed = hdev->hw.mac.speed; + cmd.base.duplex = hdev->hw.mac.duplex; + linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising); + + return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd); +} + static int hclge_update_port_info(struct hclge_dev *hdev) { struct hclge_mac *mac = &hdev->hw.mac; @@@ -3153,7 -2992,7 +3153,7 @@@
/* get the port info from SFP cmd if not copper port */ if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER) - return 0; + return hclge_update_tp_port_info(hdev);
/* if IMP does not support get SFP/qSFP info, return directly */ if (!hdev->support_sfp_query) @@@ -3173,7 -3012,7 +3173,7 @@@
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { if (mac->speed_type == QUERY_ACTIVE_SPEED) { - hclge_update_port_capability(mac); + hclge_update_port_capability(hdev, mac); return 0; } return hclge_cfg_mac_speed_dup(hdev, mac->speed, @@@ -3358,7 -3197,7 +3358,7 @@@ static irqreturn_t hclge_misc_irq_handl * caused this event. Therefore, we will do below for now: * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we * have defered type of reset to be used. - * 2. Schedule the reset serivce task. + * 2. Schedule the reset service task. * 3. When service task receives HNAE3_UNKNOWN_RESET type it * will fetch the correct type of reset. This would be done * by first decoding the types of errors. @@@ -3486,9 -3325,8 +3486,9 @@@ static void hclge_misc_irq_uninit(struc int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *client = hdev->nic_client; - u16 i; + int ret;
if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client) return 0; @@@ -3496,20 -3334,27 +3496,20 @@@ if (!client->ops->reset_notify) return -EOPNOTSUPP;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].nic; - int ret; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify nic client failed %d(%d)\n", type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n", + type, ret);
- return 0; + return ret; }
static int hclge_notify_roce_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].roce; struct hnae3_client *client = hdev->roce_client; int ret; - u16 i;
if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client) return 0; @@@ -3517,10 -3362,17 +3517,10 @@@ if (!client->ops->reset_notify) return -EOPNOTSUPP;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].roce; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify roce client failed %d(%d)", - type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)", + type, ret);
return ret; } @@@ -3588,7 -3440,7 +3588,7 @@@ static int hclge_set_all_vf_rst(struct { int i;
- for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) { + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; int ret;
@@@ -3669,12 -3521,14 +3669,12 @@@ void hclge_report_hw_error(struct hclge enum hnae3_hw_error_type type) { struct hnae3_client *client = hdev->nic_client; - u16 i;
if (!client || !client->ops->process_hw_error || !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state)) return;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) - client->ops->process_hw_error(&hdev->vport[i].nic, type); + client->ops->process_hw_error(&hdev->vport[0].nic, type); }
static void hclge_handle_imp_error(struct hclge_dev *hdev) @@@ -3940,21 -3794,6 +3940,21 @@@ static bool hclge_reset_err_handle(stru return false; }
+static void hclge_update_reset_level(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + enum hnae3_reset_type reset_level; + + /* if default_reset_request has a higher level reset request, + * it should be handled as soon as possible. since some errors + * need this kind of reset to fix. + */ + reset_level = hclge_get_reset_level(ae_dev, + &hdev->default_reset_request); + if (reset_level != HNAE3_NONE_RESET) + set_bit(reset_level, &hdev->reset_request); +} + static int hclge_set_rst_done(struct hclge_dev *hdev) { struct hclge_pf_rst_done_cmd *req; @@@ -4042,6 -3881,8 +4042,6 @@@ static int hclge_reset_prepare(struct h
static int hclge_reset_rebuild(struct hclge_dev *hdev) { - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); - enum hnae3_reset_type reset_level; int ret;
hdev->rst_stats.hw_reset_done_cnt++; @@@ -4085,7 -3926,14 +4085,7 @@@ hdev->rst_stats.reset_done_cnt++; clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
- /* if default_reset_request has a higher level reset request, - * it should be handled as soon as possible. since some errors - * need this kind of reset to fix. - */ - reset_level = hclge_get_reset_level(ae_dev, - &hdev->default_reset_request); - if (reset_level != HNAE3_NONE_RESET) - set_bit(reset_level, &hdev->reset_request); + hclge_update_reset_level(hdev);
return 0; } @@@ -4118,7 -3966,6 +4118,6 @@@ static void hclge_reset_event(struct pc * normalcy is to reset. * 2. A new reset request from the stack due to timeout * - * For the first case,error event might not have ae handle available. * check if this is a new reset request and we are not here just because * last reset attempt did not succeed and watchdog hit us again. We will * know this if last reset request did not occur very recently (watchdog @@@ -4128,14 -3975,14 +4127,14 @@@ * want to make sure we throttle the reset request. Therefore, we will * not allow it again before 3*HZ times. */ - if (!handle) - handle = &hdev->vport[0].nic;
if (time_before(jiffies, (hdev->last_reset_time + HCLGE_RESET_INTERVAL))) { mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL); return; - } else if (hdev->default_reset_request) { + } + + if (hdev->default_reset_request) { hdev->reset_level = hclge_get_reset_level(ae_dev, &hdev->default_reset_request); @@@ -4247,7 -4094,6 +4246,7 @@@ static void hclge_periodic_service_task hclge_update_link_status(hdev); hclge_sync_mac_table(hdev); hclge_sync_promisc_mode(hdev); + hclge_sync_fd_table(hdev);
if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { delta = jiffies - hdev->last_serv_processed; @@@ -4892,44 -4738,58 +4891,44 @@@ int hclge_rss_init_hw(struct hclge_dev
void hclge_rss_indir_init_cfg(struct hclge_dev *hdev) { - struct hclge_vport *vport = hdev->vport; - int i, j; + struct hclge_vport *vport = &hdev->vport[0]; + int i;
- for (j = 0; j < hdev->num_vmdq_vport + 1; j++) { - for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) - vport[j].rss_indirection_tbl[i] = - i % vport[j].alloc_rss_size; - } + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) + vport->rss_indirection_tbl[i] = i % vport->alloc_rss_size; }
static int hclge_rss_init_cfg(struct hclge_dev *hdev) { u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size; - int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; - struct hclge_vport *vport = hdev->vport; + int rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; + struct hclge_vport *vport = &hdev->vport[0]; + u16 *rss_ind_tbl;
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - u16 *rss_ind_tbl; - - vport[i].rss_tuple_sets.ipv4_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_sctp_en = - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv4_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_sctp_en = - hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? - HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv6_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - - vport[i].rss_algo = rss_algo; - - rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, - sizeof(*rss_ind_tbl), GFP_KERNEL); - if (!rss_ind_tbl) - return -ENOMEM; + vport->rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + + vport->rss_algo = rss_algo; + + rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, + sizeof(*rss_ind_tbl), GFP_KERNEL); + if (!rss_ind_tbl) + return -ENOMEM;
- vport[i].rss_indirection_tbl = rss_ind_tbl; - memcpy(vport[i].rss_hash_key, hclge_hash_key, - HCLGE_RSS_KEY_SIZE); - } + vport->rss_indirection_tbl = rss_ind_tbl; + memcpy(vport->rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE);
hclge_rss_indir_init_cfg(hdev);
@@@ -5135,285 -4995,6 +5134,285 @@@ static void hclge_request_update_promis set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); }
+static void hclge_sync_fd_state(struct hclge_dev *hdev) +{ + if (hlist_empty(&hdev->fd_rule_list)) + hdev->fd_active_type = HCLGE_FD_RULE_NONE; +} + +static void hclge_fd_inc_rule_cnt(struct hclge_dev *hdev, u16 location) +{ + if (!test_bit(location, hdev->fd_bmap)) { + set_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num++; + } +} + +static void hclge_fd_dec_rule_cnt(struct hclge_dev *hdev, u16 location) +{ + if (test_bit(location, hdev->fd_bmap)) { + clear_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num--; + } +} + +static void hclge_fd_free_node(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + hlist_del(&rule->rule_node); + kfree(rule); + hclge_sync_fd_state(hdev); +} + +static void hclge_update_fd_rule_node(struct hclge_dev *hdev, + struct hclge_fd_rule *old_rule, + struct hclge_fd_rule *new_rule, + enum HCLGE_FD_NODE_STATE state) +{ + switch (state) { + case HCLGE_FD_TO_ADD: + case HCLGE_FD_ACTIVE: + /* 1) if the new state is TO_ADD, just replace the old rule + * with the same location, no matter its state, because the + * new rule will be configured to the hardware. + * 2) if the new state is ACTIVE, it means the new rule + * has been configured to the hardware, so just replace + * the old rule node with the same location. + * 3) for it doesn't add a new node to the list, so it's + * unnecessary to update the rule number and fd_bmap. + */ + new_rule->rule_node.next = old_rule->rule_node.next; + new_rule->rule_node.pprev = old_rule->rule_node.pprev; + memcpy(old_rule, new_rule, sizeof(*old_rule)); + kfree(new_rule); + break; + case HCLGE_FD_DELETED: + hclge_fd_dec_rule_cnt(hdev, old_rule->location); + hclge_fd_free_node(hdev, old_rule); + break; + case HCLGE_FD_TO_DEL: + /* if new request is TO_DEL, and old rule is existent + * 1) the state of old rule is TO_DEL, we need do nothing, + * because we delete rule by location, other rule content + * is unncessary. + * 2) the state of old rule is ACTIVE, we need to change its + * state to TO_DEL, so the rule will be deleted when periodic + * task being scheduled. + * 3) the state of old rule is TO_ADD, it means the rule hasn't + * been added to hardware, so we just delete the rule node from + * fd_rule_list directly. + */ + if (old_rule->state == HCLGE_FD_TO_ADD) { + hclge_fd_dec_rule_cnt(hdev, old_rule->location); + hclge_fd_free_node(hdev, old_rule); + return; + } + old_rule->state = HCLGE_FD_TO_DEL; + break; + } +} + +static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist, + u16 location, + struct hclge_fd_rule **parent) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + if (rule->location == location) + return rule; + else if (rule->location > location) + return NULL; + /* record the parent node, use to keep the nodes in fd_rule_list + * in ascend order. + */ + *parent = rule; + } + + return NULL; +} + +/* insert fd rule node in ascend order according to rule->location */ +static void hclge_fd_insert_rule_node(struct hlist_head *hlist, + struct hclge_fd_rule *rule, + struct hclge_fd_rule *parent) +{ + INIT_HLIST_NODE(&rule->rule_node); + + if (parent) + hlist_add_behind(&rule->rule_node, &parent->rule_node); + else + hlist_add_head(&rule->rule_node, hlist); +} + +static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev, + struct hclge_fd_user_def_cfg *cfg) +{ + struct hclge_fd_user_def_cfg_cmd *req; + struct hclge_desc desc; + u16 data = 0; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false); + + req = (struct hclge_fd_user_def_cfg_cmd *)desc.data; + + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset); + req->ol2_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset); + req->ol3_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset); + req->ol4_cfg = cpu_to_le16(data); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to set fd user def data, ret= %d\n", ret); + return ret; +} + +static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev, bool locked) +{ + int ret; + + if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state)) + return; + + if (!locked) + spin_lock_bh(&hdev->fd_rule_lock); + + ret = hclge_fd_set_user_def_cmd(hdev, hdev->fd_cfg.user_def_cfg); + if (ret) + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + + if (!locked) + spin_unlock_bh(&hdev->fd_rule_lock); +} + +static int hclge_fd_check_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + struct hclge_fd_user_def_info *info, *old_info; + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return 0; + + /* for valid layer is start from 1, so need minus 1 to get the cfg */ + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + info = &rule->ep.user_def; + + if (!cfg->ref_cnt || cfg->offset == info->offset) + return 0; + + if (cfg->ref_cnt > 1) + goto error; + + fd_rule = hclge_find_fd_rule(hlist, rule->location, &parent); + if (fd_rule) { + old_info = &fd_rule->ep.user_def; + if (info->layer == old_info->layer) + return 0; + } + +error: + dev_err(&hdev->pdev->dev, + "No available offset for layer%d fd rule, each layer only support one user def offset.\n", + info->layer + 1); + return -ENOSPC; +} + +static void hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + if (!cfg->ref_cnt) { + cfg->offset = rule->ep.user_def.offset; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } + cfg->ref_cnt++; +} + +static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + if (!cfg->ref_cnt) + return; + + cfg->ref_cnt--; + if (!cfg->ref_cnt) { + cfg->offset = 0; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } +} + +static void hclge_update_fd_list(struct hclge_dev *hdev, + enum HCLGE_FD_NODE_STATE state, u16 location, + struct hclge_fd_rule *new_rule) +{ + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + + fd_rule = hclge_find_fd_rule(hlist, location, &parent); + if (fd_rule) { + hclge_fd_dec_user_def_refcnt(hdev, fd_rule); + if (state == HCLGE_FD_ACTIVE) + hclge_fd_inc_user_def_refcnt(hdev, new_rule); + hclge_sync_fd_user_def_cfg(hdev, true); + + hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state); + return; + } + + /* it's unlikely to fail here, because we have checked the rule + * exist before. + */ + if (unlikely(state == HCLGE_FD_TO_DEL || state == HCLGE_FD_DELETED)) { + dev_warn(&hdev->pdev->dev, + "failed to delete fd rule %u, it's inexistent\n", + location); + return; + } + + hclge_fd_inc_user_def_refcnt(hdev, new_rule); + hclge_sync_fd_user_def_cfg(hdev, true); + + hclge_fd_insert_rule_node(hlist, new_rule, parent); + hclge_fd_inc_rule_cnt(hdev, new_rule->location); + + if (state == HCLGE_FD_TO_ADD) { + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); + } +} + static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) { struct hclge_get_fd_mode_cmd *req; @@@ -5492,17 -5073,6 +5491,17 @@@ static int hclge_set_fd_key_config(stru return ret; }
+static void hclge_fd_disable_user_def(struct hclge_dev *hdev) +{ + struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg; + + spin_lock_bh(&hdev->fd_rule_lock); + memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg)); + spin_unlock_bh(&hdev->fd_rule_lock); + + hclge_fd_set_user_def_cmd(hdev, cfg); +} + static int hclge_init_fd_config(struct hclge_dev *hdev) { #define LOW_2_WORDS 0x03 @@@ -5543,12 -5113,9 +5542,12 @@@ BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* If use max 400bit key, we can support tuples for ether type */ - if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) + if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) { key_cfg->tuple_active |= BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC); + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + }
/* roce_type is used to filter roce frames * dst_vport is used to specify the rule @@@ -5657,57 -5224,96 +5656,57 @@@ static int hclge_fd_ad_config(struct hc static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, struct hclge_fd_rule *rule) { + int offset, moffset, ip_offset; + enum HCLGE_FD_KEY_OPT key_opt; u16 tmp_x_s, tmp_y_s; u32 tmp_x_l, tmp_y_l; + u8 *p = (u8 *)rule; int i;
- if (rule->unused_tuple & tuple_bit) + if (rule->unused_tuple & BIT(tuple_bit)) return true;
- switch (tuple_bit) { - case BIT(INNER_DST_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - } + key_opt = tuple_key_info[tuple_bit].key_opt; + offset = tuple_key_info[tuple_bit].offset; + moffset = tuple_key_info[tuple_bit].moffset;
- return true; - case BIT(INNER_SRC_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - } + switch (key_opt) { + case KEY_OPT_U8: + calc_x(*key_x, p[offset], p[moffset]); + calc_y(*key_y, p[offset], p[moffset]);
return true; - case BIT(INNER_VLAN_TAG_FST): - calc_x(tmp_x_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); - calc_y(tmp_y_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); + case KEY_OPT_LE16: + calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); + calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); *(__le16 *)key_x = cpu_to_le16(tmp_x_s); *(__le16 *)key_y = cpu_to_le16(tmp_y_s);
return true; - case BIT(INNER_ETH_TYPE): - calc_x(tmp_x_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - calc_y(tmp_y_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); - - return true; - case BIT(INNER_IP_TOS): - calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - - return true; - case BIT(INNER_IP_PROTO): - calc_x(*key_x, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - calc_y(*key_y, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - - return true; - case BIT(INNER_SRC_IP): - calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - *(__le32 *)key_x = cpu_to_le32(tmp_x_l); - *(__le32 *)key_y = cpu_to_le32(tmp_y_l); - - return true; - case BIT(INNER_DST_IP): - calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); + case KEY_OPT_LE32: + calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); *(__le32 *)key_x = cpu_to_le32(tmp_x_l); *(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true; - case BIT(INNER_SRC_PORT): - calc_x(tmp_x_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - calc_y(tmp_y_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_MAC: + for (i = 0; i < ETH_ALEN; i++) { + calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + }
return true; - case BIT(INNER_DST_PORT): - calc_x(tmp_x_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - calc_y(tmp_y_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_IP: + ip_offset = IPV4_INDEX * sizeof(u32); + calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + *(__le32 *)key_x = cpu_to_le32(tmp_x_l); + *(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true; default: @@@ -5795,12 -5401,12 +5794,12 @@@ static int hclge_config_key(struct hclg
for (i = 0 ; i < MAX_TUPLE; i++) { bool tuple_valid; - u32 check_tuple;
tuple_size = tuple_key_info[i].key_length / 8; - check_tuple = key_cfg->tuple_active & BIT(i); + if (!(key_cfg->tuple_active & BIT(i))) + continue;
- tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x, + tuple_valid = hclge_fd_convert_tuple(i, cur_key_x, cur_key_y, rule); if (tuple_valid) { cur_key_x += tuple_size; @@@ -5931,7 -5537,8 +5930,7 @@@ static int hclge_fd_check_tcpip6_tuple( if (!spec || !unused_tuple) return -EINVAL;
- *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS); + *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
/* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@@ -5946,8 -5553,8 +5945,8 @@@ if (!spec->pdst) *unused_tuple |= BIT(INNER_DST_PORT);
- if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS);
return 0; } @@@ -5959,7 -5566,7 +5958,7 @@@ static int hclge_fd_check_ip6_tuple(str return -EINVAL;
*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); + BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@@ -5971,8 -5578,8 +5970,8 @@@ if (!spec->l4_proto) *unused_tuple |= BIT(INNER_IP_PROTO);
- if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS);
if (spec->l4_4_bytes) return -EOPNOTSUPP; @@@ -6042,98 -5649,9 +6041,98 @@@ static int hclge_fd_check_ext_tuple(str return 0; }
+static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple, + struct hclge_fd_user_def_info *info) +{ + switch (flow_type) { + case ETHER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L2; + *unused_tuple &= ~BIT(INNER_L2_RSV); + break; + case IP_USER_FLOW: + case IPV6_USER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L3; + *unused_tuple &= ~BIT(INNER_L3_RSV); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + info->layer = HCLGE_FD_USER_DEF_L4; + *unused_tuple &= ~BIT(INNER_L4_RSV); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs) +{ + return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0; +} + +static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) +{ + u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active; + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + u16 data, offset, data_mask, offset_mask; + int ret; + + info->layer = HCLGE_FD_USER_DEF_NONE; + *unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + + if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs)) + return 0; + + /* user-def data from ethtool is 64 bit value, the bit0~15 is used + * for data, and bit32~47 is used for offset. + */ + data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + + if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) { + dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); + return -EOPNOTSUPP; + } + + if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) { + dev_err(&hdev->pdev->dev, + "user-def offset[%u] should be no more than %u\n", + offset, HCLGE_FD_MAX_USER_DEF_OFFSET); + return -EINVAL; + } + + if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) { + dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n"); + return -EINVAL; + } + + ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info); + if (ret) { + dev_err(&hdev->pdev->dev, + "unsupported flow type for user-def bytes, ret = %d\n", + ret); + return ret; + } + + info->data = data; + info->data_mask = data_mask; + info->offset = offset; + + return 0; +} + static int hclge_fd_check_spec(struct hclge_dev *hdev, struct ethtool_rx_flow_spec *fs, - u32 *unused_tuple) + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) { u32 flow_type; int ret; @@@ -6146,9 -5664,11 +6145,9 @@@ return -EINVAL; }
- if ((fs->flow_type & FLOW_EXT) && - (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) { - dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); - return -EOPNOTSUPP; - } + ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info); + if (ret) + return ret;
flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); switch (flow_type) { @@@ -6200,194 -5720,217 +6199,194 @@@ return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple); }
-static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location) +static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - struct hclge_fd_rule *rule = NULL; - struct hlist_node *node2; - - spin_lock_bh(&hdev->fd_rule_lock); - hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - } - - spin_unlock_bh(&hdev->fd_rule_lock); + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
- return rule && rule->location == location; -} + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
-/* make sure being called after lock up with fd_rule_lock */ -static int hclge_fd_update_rule_list(struct hclge_dev *hdev, - struct hclge_fd_rule *new_rule, - u16 location, - bool is_add) -{ - struct hclge_fd_rule *rule = NULL, *parent = NULL; - struct hlist_node *node2; + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
- if (is_add && !new_rule) - return -EINVAL; + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
- hlist_for_each_entry_safe(rule, node2, - &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - parent = rule; - } + rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
- if (rule && rule->location == location) { - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF;
- if (!is_add) { - if (!hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_RULE_NONE; - clear_bit(location, hdev->fd_bmap); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; +}
- return 0; - } - } else if (!is_add) { - dev_err(&hdev->pdev->dev, - "delete fail, rule %u is inexistent\n", - location); - return -EINVAL; - } +static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
- INIT_HLIST_NODE(&new_rule->rule_node); + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
- if (parent) - hlist_add_behind(&new_rule->rule_node, &parent->rule_node); - else - hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list); + rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
- set_bit(location, hdev->fd_bmap); - hdev->hclge_fd_rule_num++; - hdev->fd_active_type = new_rule->rule_type; + rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
- return 0; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF; }
-static int hclge_fd_get_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, - struct hclge_fd_rule *rule) +static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); - - switch (flow_type) { - case SCTP_V4_FLOW: - case TCP_V4_FLOW: - case UDP_V4_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src); + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src, + IPV6_SIZE);
- rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst); + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE);
- rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
- rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst); + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
- rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos; + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF;
- rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
- break; - case IP_USER_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; +}
- rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst); +static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src, + IPV6_SIZE);
- rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos; + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst, + IPV6_SIZE);
- rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto; + rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
- rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
- break; - case SCTP_V6_FLOW: - case TCP_V6_FLOW: - case UDP_V6_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE); - - be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF; +}
- rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc); +static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source); + ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source);
- rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst); + ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest); + ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest);
- rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto); + rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto); +}
+static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info, + struct hclge_fd_rule *rule) +{ + switch (info->layer) { + case HCLGE_FD_USER_DEF_L2: + rule->tuples.l2_user_def = info->data; + rule->tuples_mask.l2_user_def = info->data_mask; break; - case IPV6_USER_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE); - - be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - - rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto; - - rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; - + case HCLGE_FD_USER_DEF_L3: + rule->tuples.l3_user_def = info->data; + rule->tuples_mask.l3_user_def = info->data_mask; break; - case ETHER_FLOW: - ether_addr_copy(rule->tuples.src_mac, - fs->h_u.ether_spec.h_source); - ether_addr_copy(rule->tuples_mask.src_mac, - fs->m_u.ether_spec.h_source); - - ether_addr_copy(rule->tuples.dst_mac, - fs->h_u.ether_spec.h_dest); - ether_addr_copy(rule->tuples_mask.dst_mac, - fs->m_u.ether_spec.h_dest); - - rule->tuples.ether_proto = - be16_to_cpu(fs->h_u.ether_spec.h_proto); - rule->tuples_mask.ether_proto = - be16_to_cpu(fs->m_u.ether_spec.h_proto); - + case HCLGE_FD_USER_DEF_L4: + rule->tuples.l4_user_def = (u32)info->data << 16; + rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16; break; default: - return -EOPNOTSUPP; + break; }
+ rule->ep.user_def = *info; +} + +static int hclge_fd_get_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, + struct hclge_fd_user_def_info *info) +{ + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + switch (flow_type) { case SCTP_V4_FLOW: - case SCTP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_SCTP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP); break; case TCP_V4_FLOW: - case TCP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_TCP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP); break; case UDP_V4_FLOW: + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP); + break; + case IP_USER_FLOW: + hclge_fd_get_ip4_tuple(hdev, fs, rule); + break; + case SCTP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP); + break; + case TCP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP); + break; case UDP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_UDP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP); break; - default: + case IPV6_USER_FLOW: + hclge_fd_get_ip6_tuple(hdev, fs, rule); break; + case ETHER_FLOW: + hclge_fd_get_ether_tuple(hdev, fs, rule); + break; + default: + return -EOPNOTSUPP; }
if (fs->flow_type & FLOW_EXT) { rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci); rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci); + hclge_fd_get_user_def_tuple(info, rule); }
if (fs->flow_type & FLOW_MAC_EXT) { @@@ -6398,53 -5941,33 +6397,53 @@@ return 0; }
-/* make sure being called after lock up with fd_rule_lock */ static int hclge_fd_config_rule(struct hclge_dev *hdev, struct hclge_fd_rule *rule) { int ret;
- if (!rule) { + ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) + return ret; + + return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); +} + +static int hclge_add_fd_entry_common(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + int ret; + + spin_lock_bh(&hdev->fd_rule_lock); + + if (hdev->fd_active_type != rule->rule_type && + (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) { dev_err(&hdev->pdev->dev, - "The flow director rule is NULL\n"); + "mode conflict(new type %d, active type %d), please delete existent rules first\n", + rule->rule_type, hdev->fd_active_type); + spin_unlock_bh(&hdev->fd_rule_lock); return -EINVAL; }
- /* it will never fail here, so needn't to check return value */ - hclge_fd_update_rule_list(hdev, rule, rule->location, true); + ret = hclge_fd_check_user_def_refcnt(hdev, rule); + if (ret) + goto out;
- ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + ret = hclge_clear_arfs_rules(hdev); if (ret) - goto clear_rule; + goto out;
- ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); + ret = hclge_fd_config_rule(hdev, rule); if (ret) - goto clear_rule; + goto out;
- return 0; + rule->state = HCLGE_FD_ACTIVE; + hdev->fd_active_type = rule->rule_type; + hclge_update_fd_list(hdev, rule->state, rule->location, rule);
-clear_rule: - hclge_fd_update_rule_list(hdev, rule, rule->location, false); +out: + spin_unlock_bh(&hdev->fd_rule_lock); return ret; }
@@@ -6456,48 -5979,11 +6455,48 @@@ static bool hclge_is_cls_flower_active( return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE; }
+static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie, + u16 *vport_id, u8 *action, u16 *queue_id) +{ + struct hclge_vport *vport = hdev->vport; + + if (ring_cookie == RX_CLS_FLOW_DISC) { + *action = HCLGE_FD_ACTION_DROP_PACKET; + } else { + u32 ring = ethtool_get_flow_spec_ring(ring_cookie); + u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie); + u16 tqps; + + if (vf > hdev->num_req_vfs) { + dev_err(&hdev->pdev->dev, + "Error: vf id (%u) > max vf num (%u)\n", + vf, hdev->num_req_vfs); + return -EINVAL; + } + + *vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; + tqps = hdev->vport[vf].nic.kinfo.num_tqps; + + if (ring >= tqps) { + dev_err(&hdev->pdev->dev, + "Error: queue id (%u) > max tqp num (%u)\n", + ring, tqps - 1); + return -EINVAL; + } + + *action = HCLGE_FD_ACTION_SELECT_QUEUE; + *queue_id = ring; + } + + return 0; +} + static int hclge_add_fd_entry(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + struct hclge_fd_user_def_info info; u16 dst_vport_id = 0, q_index = 0; struct ethtool_rx_flow_spec *fs; struct hclge_fd_rule *rule; @@@ -6517,22 -6003,51 +6516,22 @@@ return -EOPNOTSUPP; }
- if (hclge_is_cls_flower_active(handle)) { - dev_err(&hdev->pdev->dev, - "please delete all exist cls flower rules first\n"); - return -EINVAL; - } - fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
- ret = hclge_fd_check_spec(hdev, fs, &unused); + ret = hclge_fd_check_spec(hdev, fs, &unused, &info); if (ret) return ret;
- if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = HCLGE_FD_ACTION_DROP_PACKET; - } else { - u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); - u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - u16 tqps; - - if (vf > hdev->num_req_vfs) { - dev_err(&hdev->pdev->dev, - "Error: vf id (%u) > max vf num (%u)\n", - vf, hdev->num_req_vfs); - return -EINVAL; - } - - dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; - tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps; - - if (ring >= tqps) { - dev_err(&hdev->pdev->dev, - "Error: queue id (%u) > max tqp num (%u)\n", - ring, tqps - 1); - return -EINVAL; - } - - action = HCLGE_FD_ACTION_SELECT_QUEUE; - q_index = ring; - } + ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id, + &action, &q_index); + if (ret) + return ret;
rule = kzalloc(sizeof(*rule), GFP_KERNEL); if (!rule) return -ENOMEM;
- ret = hclge_fd_get_tuple(hdev, fs, rule); + ret = hclge_fd_get_tuple(hdev, fs, rule, &info); if (ret) { kfree(rule); return ret; @@@ -6546,9 -6061,15 +6545,9 @@@ rule->action = action; rule->rule_type = HCLGE_FD_EP_ACTIVE;
- /* to avoid rule conflict, when user configure rule by ethtool, - * we need to clear all arfs rules - */ - spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule);
return ret; } @@@ -6569,30 -6090,32 +6568,30 @@@ static int hclge_del_fd_entry(struct hn if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) return -EINVAL;
- if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num || - !hclge_fd_rule_exist(hdev, fs->location)) { + spin_lock_bh(&hdev->fd_rule_lock); + if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + !test_bit(fs->location, hdev->fd_bmap)) { dev_err(&hdev->pdev->dev, "Delete fail, rule %u is inexistent\n", fs->location); + spin_unlock_bh(&hdev->fd_rule_lock); return -ENOENT; }
ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location, NULL, false); if (ret) - return ret; + goto out;
- spin_lock_bh(&hdev->fd_rule_lock); - ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false); + hclge_update_fd_list(hdev, HCLGE_FD_DELETED, fs->location, NULL);
+out: spin_unlock_bh(&hdev->fd_rule_lock); - return ret; }
-/* make sure being called after lock up with fd_rule_lock */ -static void hclge_del_all_fd_entries(struct hnae3_handle *handle, - bool clear_list) +static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev, + bool clear_list) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; u16 location; @@@ -6600,8 -6123,6 +6599,8 @@@ if (!hnae3_dev_fd_supported(hdev)) return;
+ spin_lock_bh(&hdev->fd_rule_lock); + for_each_set_bit(location, hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location, @@@ -6618,14 -6139,6 +6617,14 @@@ bitmap_zero(hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); } + + spin_unlock_bh(&hdev->fd_rule_lock); +} + +static void hclge_del_all_fd_entries(struct hclge_dev *hdev) +{ + hclge_clear_fd_rules_in_list(hdev, true); + hclge_fd_disable_user_def(hdev); }
static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@@ -6634,6 -6147,7 +6633,6 @@@ struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; - int ret;
/* Return ok here, because reset error handling will check this * return value. If error is returned here, the reset process will @@@ -6648,11 -6162,25 +6647,11 @@@
spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (!ret) - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); - - if (ret) { - dev_warn(&hdev->pdev->dev, - "Restore rule %u failed, remove it\n", - rule->location); - clear_bit(rule->location, hdev->fd_bmap); - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; - } + if (rule->state == HCLGE_FD_ACTIVE) + rule->state = HCLGE_FD_TO_ADD; } - - if (hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_EP_ACTIVE; - spin_unlock_bh(&hdev->fd_rule_lock); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
return 0; } @@@ -6740,10 -6268,6 +6739,10 @@@ static void hclge_fd_get_tcpip6_info(st cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE);
+ spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->psrc = cpu_to_be16(rule->tuples.src_port); spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ? 0 : cpu_to_be16(rule->tuples_mask.src_port); @@@ -6771,10 -6295,6 +6770,10 @@@ static void hclge_fd_get_ip6_info(struc cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE);
+ spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->l4_proto = rule->tuples.ip_proto; spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ? 0 : rule->tuples_mask.ip_proto; @@@ -6802,24 -6322,6 +6801,24 @@@ static void hclge_fd_get_ether_info(str 0 : cpu_to_be16(rule->tuples_mask.ether_proto); }
+static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) == + HCLGE_FD_TUPLE_USER_DEF_TUPLES) { + fs->h_ext.data[0] = 0; + fs->h_ext.data[1] = 0; + fs->m_ext.data[0] = 0; + fs->m_ext.data[1] = 0; + } else { + fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset); + fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data); + fs->m_ext.data[0] = + cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK); + fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask); + } +} + static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { @@@ -6828,8 -6330,6 +6827,8 @@@ fs->m_ext.vlan_tci = rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ? 0 : cpu_to_be16(rule->tuples_mask.vlan_tag1); + + hclge_fd_get_user_def_info(fs, rule); }
if (fs->flow_type & FLOW_MAC_EXT) { @@@ -6941,9 -6441,6 +6940,9 @@@ static int hclge_get_all_rules(struct h return -EMSGSIZE; }
+ if (rule->state == HCLGE_FD_TO_DEL) + continue; + rule_locs[cnt] = rule->location; cnt++; } @@@ -7003,7 -6500,6 +7002,7 @@@ static void hclge_fd_build_arfs_rule(co rule->action = 0; rule->vf_id = 0; rule->rule_type = HCLGE_FD_ARFS_ACTIVE; + rule->state = HCLGE_FD_TO_ADD; if (tuples->ether_proto == ETH_P_IP) { if (tuples->ip_proto == IPPROTO_TCP) rule->flow_type = TCP_V4_FLOW; @@@ -7026,7 -6522,9 +7025,7 @@@ static int hclge_add_fd_entry_by_arfs(s struct hclge_fd_rule_tuples new_tuples = {}; struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; - u16 tmp_queue_id; u16 bit_id; - int ret;
if (!hnae3_dev_fd_supported(hdev)) return -EOPNOTSUPP; @@@ -7062,19 -6560,34 +7061,19 @@@ return -ENOMEM; }
- set_bit(bit_id, hdev->fd_bmap); rule->location = bit_id; rule->arfs.flow_id = flow_id; rule->queue_id = queue_id; hclge_fd_build_arfs_rule(&new_tuples, rule); - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) - return ret; - - return rule->location; + hclge_update_fd_list(hdev, rule->state, rule->location, rule); + hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE; + } else if (rule->queue_id != queue_id) { + rule->queue_id = queue_id; + rule->state = HCLGE_FD_TO_ADD; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); } - spin_unlock_bh(&hdev->fd_rule_lock); - - if (rule->queue_id == queue_id) - return rule->location; - - tmp_queue_id = rule->queue_id; - rule->queue_id = queue_id; - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) { - rule->queue_id = tmp_queue_id; - return ret; - } - return rule->location; }
@@@ -7084,6 -6597,7 +7083,6 @@@ static void hclge_rfs_filter_expire(str struct hnae3_handle *handle = &hdev->vport[0].nic; struct hclge_fd_rule *rule; struct hlist_node *node; - HLIST_HEAD(del_list);
spin_lock_bh(&hdev->fd_rule_lock); if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) { @@@ -7091,51 -6605,34 +7090,51 @@@ return; } hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rule->state != HCLGE_FD_ACTIVE) + continue; if (rps_may_expire_flow(handle->netdev, rule->queue_id, rule->arfs.flow_id, rule->location)) { - hlist_del_init(&rule->rule_node); - hlist_add_head(&rule->rule_node, &del_list); - hdev->hclge_fd_rule_num--; - clear_bit(rule->location, hdev->fd_bmap); + rule->state = HCLGE_FD_TO_DEL; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + } + } + spin_unlock_bh(&hdev->fd_rule_lock); +#endif +} + +/* make sure being called after lock up with fd_rule_lock */ +static int hclge_clear_arfs_rules(struct hclge_dev *hdev) +{ +#ifdef CONFIG_RFS_ACCEL + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret; + + if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) + return 0; + + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_DEL: + case HCLGE_FD_ACTIVE: + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) + return ret; + fallthrough; + case HCLGE_FD_TO_ADD: + hclge_fd_dec_rule_cnt(hdev, rule->location); + hlist_del(&rule->rule_node); + kfree(rule); + break; + default: + break; } } - spin_unlock_bh(&hdev->fd_rule_lock); - - hlist_for_each_entry_safe(rule, node, &del_list, rule_node) { - hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - rule->location, NULL, false); - kfree(rule); - } -#endif -} - -/* make sure being called after lock up with fd_rule_lock */ -static void hclge_clear_arfs_rules(struct hnae3_handle *handle) -{ -#ifdef CONFIG_RFS_ACCEL - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; + hclge_sync_fd_state(hdev);
- if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE) - hclge_del_all_fd_entries(handle, true); #endif + return 0; }
static void hclge_get_cls_key_basic(const struct flow_rule *flow, @@@ -7317,6 -6814,12 +7316,6 @@@ static int hclge_add_cls_flower(struct struct hclge_fd_rule *rule; int ret;
- if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) { - dev_err(&hdev->pdev->dev, - "please remove all exist fd rules via ethtool first\n"); - return -EINVAL; - } - ret = hclge_check_cls_flower(hdev, cls_flower, tc); if (ret) { dev_err(&hdev->pdev->dev, @@@ -7329,10 -6832,8 +7328,10 @@@ return -ENOMEM;
ret = hclge_parse_cls_flower(hdev, cls_flower, rule); - if (ret) - goto err; + if (ret) { + kfree(rule); + return ret; + }
rule->action = HCLGE_FD_ACTION_SELECT_TC; rule->cls_flower.tc = tc; @@@ -7341,10 -6842,22 +7340,10 @@@ rule->cls_flower.cookie = cls_flower->cookie; rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE;
- spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to add cls flower rule, ret = %d\n", ret); - goto err; - } + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule);
- return 0; -err: - kfree(rule); return ret; }
@@@ -7381,66 -6894,25 +7380,66 @@@ static int hclge_del_cls_flower(struct ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location, NULL, false); if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u, ret = %d\n", - rule->location, ret); spin_unlock_bh(&hdev->fd_rule_lock); return ret; }
- ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false); - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u in list, ret = %d\n", - rule->location, ret); - spin_unlock_bh(&hdev->fd_rule_lock); - return ret; + hclge_update_fd_list(hdev, HCLGE_FD_DELETED, rule->location, NULL); + spin_unlock_bh(&hdev->fd_rule_lock); + + return 0; +} + +static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret = 0; + + if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state)) + return; + + spin_lock_bh(&hdev->fd_rule_lock); + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_ADD: + ret = hclge_fd_config_rule(hdev, rule); + if (ret) + goto out; + rule->state = HCLGE_FD_ACTIVE; + break; + case HCLGE_FD_TO_DEL: + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) + goto out; + hclge_fd_dec_rule_cnt(hdev, rule->location); + hclge_fd_free_node(hdev, rule); + break; + default: + break; + } }
+out: + if (ret) + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + spin_unlock_bh(&hdev->fd_rule_lock); +}
- return 0; +static void hclge_sync_fd_table(struct hclge_dev *hdev) +{ + if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) { + bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; + + hclge_clear_fd_rules_in_list(hdev, clear_list); + } + + hclge_sync_fd_user_def_cfg(hdev, false); + + hclge_sync_fd_list(hdev, &hdev->fd_rule_list); }
static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle) @@@ -7480,15 -6952,18 +7479,15 @@@ static void hclge_enable_fd(struct hnae { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - bool clear;
hdev->fd_en = enable; - clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
- if (!enable) { - spin_lock_bh(&hdev->fd_rule_lock); - hclge_del_all_fd_entries(handle, clear); - spin_unlock_bh(&hdev->fd_rule_lock); - } else { + if (!enable) + set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state); + else hclge_restore_fd_entries(handle); - } + + hclge_task_schedule(hdev, 0); }
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) @@@ -7649,19 -7124,19 +7648,19 @@@ static int hclge_set_app_loopback(struc return ret; }
-static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en, +static int hclge_cfg_common_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { -#define HCLGE_SERDES_RETRY_MS 10 -#define HCLGE_SERDES_RETRY_NUM 100 +#define HCLGE_COMMON_LB_RETRY_MS 10 +#define HCLGE_COMMON_LB_RETRY_NUM 100
- struct hclge_serdes_lb_cmd *req; + struct hclge_common_lb_cmd *req; struct hclge_desc desc; int ret, i = 0; u8 loop_mode_b;
- req = (struct hclge_serdes_lb_cmd *)desc.data; - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false); + req = (struct hclge_common_lb_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, false);
switch (loop_mode) { case HNAE3_LOOP_SERIAL_SERDES: @@@ -7670,12 -7145,9 +7669,12 @@@ case HNAE3_LOOP_PARALLEL_SERDES: loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B; break; + case HNAE3_LOOP_PHY: + loop_mode_b = HCLGE_CMD_GE_PHY_INNER_LOOP_B; + break; default: dev_err(&hdev->pdev->dev, - "unsupported serdes loopback mode %d\n", loop_mode); + "unsupported common loopback mode %d\n", loop_mode); return -ENOTSUPP; }
@@@ -7689,39 -7161,39 +7688,39 @@@ ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "serdes loopback set fail, ret = %d\n", ret); + "common loopback set fail, ret = %d\n", ret); return ret; }
do { - msleep(HCLGE_SERDES_RETRY_MS); - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, + msleep(HCLGE_COMMON_LB_RETRY_MS); + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "serdes loopback get, ret = %d\n", ret); + "common loopback get, ret = %d\n", ret); return ret; } - } while (++i < HCLGE_SERDES_RETRY_NUM && - !(req->result & HCLGE_CMD_SERDES_DONE_B)); + } while (++i < HCLGE_COMMON_LB_RETRY_NUM && + !(req->result & HCLGE_CMD_COMMON_LB_DONE_B));
- if (!(req->result & HCLGE_CMD_SERDES_DONE_B)) { - dev_err(&hdev->pdev->dev, "serdes loopback set timeout\n"); + if (!(req->result & HCLGE_CMD_COMMON_LB_DONE_B)) { + dev_err(&hdev->pdev->dev, "common loopback set timeout\n"); return -EBUSY; - } else if (!(req->result & HCLGE_CMD_SERDES_SUCCESS_B)) { - dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n"); + } else if (!(req->result & HCLGE_CMD_COMMON_LB_SUCCESS_B)) { + dev_err(&hdev->pdev->dev, "common loopback set failed in fw\n"); return -EIO; } return ret; }
-static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, +static int hclge_set_common_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { int ret;
- ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode); + ret = hclge_cfg_common_loopback(hdev, en, loop_mode); if (ret) return ret;
@@@ -7770,12 -7242,8 +7769,12 @@@ static int hclge_set_phy_loopback(struc struct phy_device *phydev = hdev->hw.mac.phydev; int ret;
- if (!phydev) + if (!phydev) { + if (hnae3_dev_phy_imp_supported(hdev)) + return hclge_set_common_loopback(hdev, en, + HNAE3_LOOP_PHY); return -ENOTSUPP; + }
if (en) ret = hclge_enable_phy_loopback(hdev, phydev); @@@ -7797,12 -7265,13 +7796,12 @@@ return ret; }
-static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) +static int hclge_tqp_enable_cmd_send(struct hclge_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclge_desc desc; struct hclge_cfg_com_tqp_queue_cmd *req = (struct hclge_cfg_com_tqp_queue_cmd *)desc.data; - int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); req->tqp_id = cpu_to_le16(tqp_id); @@@ -7810,30 -7279,20 +7809,30 @@@ if (enable) req->enable |= 1U << HCLGE_TQP_ENABLE_B;
- ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) - dev_err(&hdev->pdev->dev, - "Tqp enable fail, status =%d.\n", ret); - return ret; + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + +static int hclge_tqp_enable(struct hnae3_handle *handle, bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclge_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + return 0; }
static int hclge_set_loopback(struct hnae3_handle *handle, enum hnae3_loop loop_mode, bool en) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hnae3_knic_private_info *kinfo; struct hclge_dev *hdev = vport->back; - int i, ret; + int ret;
/* Loopback can be enabled in three places: SSU, MAC, and serdes. By * default, SSU loopback is enabled, so if the SMAC and the DMAC are @@@ -7855,7 -7314,7 +7854,7 @@@ break; case HNAE3_LOOP_SERIAL_SERDES: case HNAE3_LOOP_PARALLEL_SERDES: - ret = hclge_set_serdes_loopback(hdev, en, loop_mode); + ret = hclge_set_common_loopback(hdev, en, loop_mode); break; case HNAE3_LOOP_PHY: ret = hclge_set_phy_loopback(hdev, en); @@@ -7870,12 -7329,14 +7869,12 @@@ if (ret) return ret;
- kinfo = &vport->nic.kinfo; - for (i = 0; i < kinfo->num_tqps; i++) { - ret = hclge_tqp_enable(hdev, i, 0, en); - if (ret) - return ret; - } + ret = hclge_tqp_enable(handle, en); + if (ret) + dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n", + en ? "enable" : "disable", ret);
- return 0; + return ret; }
static int hclge_set_default_loopback(struct hclge_dev *hdev) @@@ -7886,11 -7347,11 +7885,11 @@@ if (ret) return ret;
- ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); + ret = hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); if (ret) return ret;
- return hclge_cfg_serdes_loopback(hdev, false, + return hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_PARALLEL_SERDES); }
@@@ -7962,10 -7423,11 +7961,10 @@@ static void hclge_ae_stop(struct hnae3_ { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int i;
set_bit(HCLGE_STATE_DOWN, &hdev->state); spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); + hclge_clear_arfs_rules(hdev); spin_unlock_bh(&hdev->fd_rule_lock);
/* If it is not PF reset, the firmware will disable the MAC, @@@ -7978,7 -7440,8 +7977,7 @@@ return; }
- for (i = 0; i < handle->kinfo.num_tqps; i++) - hclge_reset_tqp(handle, i); + hclge_reset_tqp(handle);
hclge_config_mac_tnl_int(hdev, false);
@@@ -8428,7 -7891,7 +8427,7 @@@ int hclge_update_mac_list(struct hclge_
/* if the mac addr is already in the mac list, no need to add a new * one into it, just check the mac addr state, convert it to a new - * new state, or just remove it, or do nothing. + * state, or just remove it, or do nothing. */ mac_node = hclge_find_mac_node(list, addr); if (mac_node) { @@@ -8617,6 -8080,7 +8616,6 @@@ int hclge_add_mc_addr_common(struct hcl if (status) return status; status = hclge_add_mac_vlan_tbl(vport, &req, desc); - /* if already overflow, not to print each time */ if (status == -ENOSPC && !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE)) @@@ -8665,6 -8129,7 +8664,6 @@@ int hclge_rm_mc_addr_common(struct hclg else /* Not all the vfid is zero, update the vfid */ status = hclge_add_mac_vlan_tbl(vport, &req, desc); - } else if (status == -ENOENT) { status = 0; } @@@ -9099,7 -8564,7 +9098,7 @@@ static bool hclge_check_vf_mac_exist(st return true;
vf_idx += HCLGE_VF_VPORT_START_NUM; - for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) if (i != vf_idx && ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) return true; @@@ -9293,29 -8758,6 +9292,29 @@@ static int hclge_set_mac_addr(struct hn return 0; }
+static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) +{ + struct mii_ioctl_data *data = if_mii(ifr); + + if (!hnae3_dev_phy_imp_supported(hdev)) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = hdev->hw.mac.phy_addr; + /* this command reads phy id and register at the same time */ + fallthrough; + case SIOCGMIIREG: + data->val_out = hclge_read_phy_reg(hdev, data->reg_num); + return 0; + + case SIOCSMIIREG: + return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); + default: + return -EOPNOTSUPP; + } +} + static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr, int cmd) { @@@ -9323,7 -8765,7 +9322,7 @@@ struct hclge_dev *hdev = vport->back;
if (!hdev->hw.mac.phydev) - return -EOPNOTSUPP; + return hclge_mii_ioctl(hdev, ifr, cmd);
return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd); } @@@ -9480,7 -8922,8 +9479,7 @@@ static int hclge_check_vf_vlan_cmd_stat }
static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, - bool is_kill, u16 vlan, - __be16 proto) + bool is_kill, u16 vlan) { struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_desc desc[2]; @@@ -9546,7 -8989,8 +9545,7 @@@ static int hclge_set_vlan_filter_hw(str if (is_kill && !vlan_id) return 0;
- ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id, - proto); + ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id); if (ret) { dev_err(&hdev->pdev->dev, "Set %u vport vlan filter config fail, ret =%d.\n", @@@ -9996,7 -9440,7 +9995,7 @@@ static void hclge_restore_hw_table(stru hclge_restore_mac_table_common(vport); hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); - + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); }
@@@ -10352,7 -9796,7 +10351,7 @@@ out return ret; }
-static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, +static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id, bool enable) { struct hclge_reset_tqp_queue_cmd *req; @@@ -10408,114 -9852,94 +10407,114 @@@ u16 hclge_covert_handle_qid_global(stru return tqp->index; }
-int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclge_reset_tqp_cmd(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; + u16 reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + u16 i;
- queue_gid = hclge_covert_handle_qid_global(handle, queue_id); - - ret = hclge_tqp_enable(hdev, queue_id, 0, false); - if (ret) { - dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret); - return ret; - } + for (i = 0; i < handle->kinfo.num_tqps; i++) { + queue_gid = hclge_covert_handle_qid_global(handle, i); + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to send reset tqp cmd, ret = %d\n", + ret); + return ret; + }
- ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); - if (ret) { - dev_err(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return ret; - } + while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { + reset_status = hclge_get_reset_status(hdev, queue_gid); + if (reset_status) + break;
- while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + /* Wait for tqp hw reset */ + usleep_range(1000, 1200); + }
- /* Wait for tqp hw reset */ - usleep_range(1000, 1200); - } + if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { + dev_err(&hdev->pdev->dev, + "wait for tqp hw reset timeout\n"); + return -ETIME; + }
- if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_err(&hdev->pdev->dev, "Reset TQP fail\n"); - return ret; + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to deassert soft reset, ret = %d\n", + ret); + return ret; + } + reset_try_times = 0; } - - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_err(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); - - return ret; + return 0; }
-void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) +static int hclge_reset_rcb(struct hnae3_handle *handle) { - struct hnae3_handle *handle = &vport->nic; +#define HCLGE_RESET_RCB_NOT_SUPPORT 0U +#define HCLGE_RESET_RCB_SUCCESS 1U + + struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; - int reset_status; + struct hclge_reset_cmd *req; + struct hclge_desc desc; + u8 return_status; u16 queue_gid; int ret;
- if (queue_id >= handle->kinfo.num_tqps) { - dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", - queue_id); - return; - } + queue_gid = hclge_covert_handle_qid_global(handle, 0);
- queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); + req = (struct hclge_reset_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false); + hnae3_set_bit(req->fun_reset_rcb, HCLGE_CFG_RESET_RCB_B, 1); + req->fun_reset_rcb_vqid_start = cpu_to_le16(queue_gid); + req->fun_reset_rcb_vqid_num = cpu_to_le16(handle->kinfo.num_tqps);
- ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { - dev_warn(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return; + dev_err(&hdev->pdev->dev, + "failed to send rcb reset cmd, ret = %d\n", ret); + return ret; }
- while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + return_status = req->fun_reset_rcb_return_status; + if (return_status == HCLGE_RESET_RCB_SUCCESS) + return 0;
- /* Wait for tqp hw reset */ - usleep_range(1000, 1200); + if (return_status != HCLGE_RESET_RCB_NOT_SUPPORT) { + dev_err(&hdev->pdev->dev, "failed to reset rcb, ret = %u\n", + return_status); + return -EIO; }
- if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_warn(&hdev->pdev->dev, "Reset TQP fail\n"); - return; + /* if reset rcb cmd is unsupported, we need to send reset tqp cmd + * again to reset all tqps + */ + return hclge_reset_tqp_cmd(handle); +} + +int hclge_reset_tqp(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + /* only need to disable PF's tqp */ + if (!vport->vport_id) { + ret = hclge_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to disable tqp, ret = %d\n", ret); + return ret; + } }
- ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_warn(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); + return hclge_reset_rcb(handle); }
static u32 hclge_get_fw_version(struct hnae3_handle *handle) @@@ -10588,10 -10012,9 +10587,10 @@@ static void hclge_get_pauseparam(struc { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct phy_device *phydev = hdev->hw.mac.phydev; + u8 media_type = hdev->hw.mac.media_type;
- *auto_neg = phydev ? hclge_get_autoneg(handle) : 0; + *auto_neg = (media_type == HNAE3_MEDIA_TYPE_COPPER) ? + hclge_get_autoneg(handle) : 0;
if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) { *rx_en = 0; @@@ -10637,7 -10060,7 +10636,7 @@@ static int hclge_set_pauseparam(struct struct phy_device *phydev = hdev->hw.mac.phydev; u32 fc_autoneg;
- if (phydev) { + if (phydev || hnae3_dev_phy_imp_supported(hdev)) { fc_autoneg = hclge_get_autoneg(handle); if (auto_neg != fc_autoneg) { dev_info(&hdev->pdev->dev, @@@ -10656,7 -10079,7 +10655,7 @@@
hclge_record_user_pauseparam(hdev, rx_en, tx_en);
- if (!auto_neg) + if (!auto_neg || hnae3_dev_phy_imp_supported(hdev)) return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
if (phydev) @@@ -10758,6 -10181,7 +10757,6 @@@ static void hclge_info_show(struct hclg dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc); dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc); dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport); - dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport); dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs); dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map); dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size); @@@ -10872,35 -10296,39 +10871,35 @@@ static int hclge_init_client_instance(s struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i, ret; - - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; + struct hclge_vport *vport = &hdev->vport[0]; + int ret;
- switch (client->type) { - case HNAE3_CLIENT_KNIC: - hdev->nic_client = client; - vport->nic.client = client; - ret = hclge_init_nic_client_instance(ae_dev, vport); - if (ret) - goto clear_nic; + switch (client->type) { + case HNAE3_CLIENT_KNIC: + hdev->nic_client = client; + vport->nic.client = client; + ret = hclge_init_nic_client_instance(ae_dev, vport); + if (ret) + goto clear_nic;
- ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce;
- break; - case HNAE3_CLIENT_ROCE: - if (hnae3_dev_roce_supported(hdev)) { - hdev->roce_client = client; - vport->roce.client = client; - } + break; + case HNAE3_CLIENT_ROCE: + if (hnae3_dev_roce_supported(hdev)) { + hdev->roce_client = client; + vport->roce.client = client; + }
- ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce;
- break; - default: - return -EINVAL; - } + break; + default: + return -EINVAL; }
return 0; @@@ -10919,27 -10347,32 +10918,27 @@@ static void hclge_uninit_client_instanc struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i; + struct hclge_vport *vport = &hdev->vport[0];
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; - if (hdev->roce_client) { - clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - hdev->roce_client->ops->uninit_instance(&vport->roce, - 0); - hdev->roce_client = NULL; - vport->roce.client = NULL; - } - if (client->type == HNAE3_CLIENT_ROCE) - return; - if (hdev->nic_client && client->ops->uninit_instance) { - clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - client->ops->uninit_instance(&vport->nic, 0); - hdev->nic_client = NULL; - vport->nic.client = NULL; - } + if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + hdev->roce_client = NULL; + vport->roce.client = NULL; + } + if (client->type == HNAE3_CLIENT_ROCE) + return; + if (hdev->nic_client && client->ops->uninit_instance) { + clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + client->ops->uninit_instance(&vport->nic, 0); + hdev->nic_client = NULL; + vport->nic.client = NULL; } }
@@@ -11058,11 -10491,10 +11057,11 @@@ static void hclge_state_uninit(struct h cancel_delayed_work_sync(&hdev->service_task); }
-static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) +static void hclge_reset_prepare_general(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type) { -#define HCLGE_FLR_RETRY_WAIT_MS 500 -#define HCLGE_FLR_RETRY_CNT 5 +#define HCLGE_RESET_RETRY_WAIT_MS 500 +#define HCLGE_RESET_RETRY_CNT 5
struct hclge_dev *hdev = ae_dev->priv; int retry_cnt = 0; @@@ -11071,34 -10503,30 +11070,34 @@@ retry: down(&hdev->reset_sem); set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - hdev->reset_type = HNAE3_FLR_RESET; + hdev->reset_type = rst_type; ret = hclge_reset_prepare(hdev); if (ret || hdev->reset_pending) { - dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n", ret); if (hdev->reset_pending || - retry_cnt++ < HCLGE_FLR_RETRY_CNT) { + retry_cnt++ < HCLGE_RESET_RETRY_CNT) { dev_err(&hdev->pdev->dev, "reset_pending:0x%lx, retry_cnt:%d\n", hdev->reset_pending, retry_cnt); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); up(&hdev->reset_sem); - msleep(HCLGE_FLR_RETRY_WAIT_MS); + msleep(HCLGE_RESET_RETRY_WAIT_MS); goto retry; } }
- /* disable misc vector before FLR done */ + /* disable misc vector before reset done */ hclge_enable_vector(&hdev->misc_vector, false); set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - hdev->rst_stats.flr_rst_cnt++; + + if (hdev->reset_type == HNAE3_FLR_RESET) + hdev->rst_stats.flr_rst_cnt++; + else if (hdev->reset_type == HNAE3_FUNC_RESET) + hdev->rst_stats.pf_rst_cnt++; }
-static void hclge_flr_done(struct hnae3_ae_dev *ae_dev) +static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; int ret; @@@ -11209,8 -10637,7 +11208,8 @@@ static int hclge_init_ae_dev(struct hna if (ret) goto err_msi_irq_uninit;
- if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER && + !hnae3_dev_phy_imp_supported(hdev)) { ret = hclge_mac_mdio_config(hdev); if (ret) goto err_msi_irq_uninit; @@@ -11603,13 -11030,6 +11602,13 @@@ static int hclge_reset_ae_dev(struct hn return ret; }
+ ret = hclge_tp_port_init(hdev); + if (ret) { + dev_err(&pdev->dev, "failed to init tp port, ret = %d\n", + ret); + return ret; + } + ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX); if (ret) { dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret); @@@ -11700,7 -11120,6 +11699,7 @@@ static void hclge_uninit_ae_dev(struct hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); hclge_uninit_mac_table(hdev); + hclge_del_all_fd_entries(hdev);
if (mac->phydev) mdiobus_unregister(mac->mdio_bus); @@@ -11791,7 -11210,7 +11790,7 @@@ static int hclge_set_channels(struct hn if (ret) return ret;
- /* RSS indirection table has been configuared by user */ + /* RSS indirection table has been configured by user */ if (rxfh_configured) goto out;
@@@ -11960,6 -11379,7 +11959,6 @@@ static int hclge_get_64_bit_regs(struc #define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) #define REG_SEPARATOR_LINE 1 #define REG_NUM_REMAIN_MASK 3 -#define BD_LIST_MAX_NUM 30
int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) { @@@ -12053,19 -11473,15 +12052,19 @@@ static int hclge_get_dfx_reg_len(struc { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int data_len_per_desc, bd_num, i; - int bd_num_list[BD_LIST_MAX_NUM]; + int *bd_num_list; u32 data_len; int ret;
+ bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; }
data_len_per_desc = sizeof_field(struct hclge_desc, data); @@@ -12076,8 -11492,6 +12075,8 @@@ *len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE; }
+out: + kfree(bd_num_list); return ret; }
@@@ -12085,20 -11499,16 +12084,20 @@@ static int hclge_get_dfx_reg(struct hcl { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int bd_num, bd_num_max, buf_len, i; - int bd_num_list[BD_LIST_MAX_NUM]; struct hclge_desc *desc_src; + int *bd_num_list; u32 *reg = data; int ret;
+ bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; }
bd_num_max = bd_num_list[0]; @@@ -12107,10 -11517,8 +12106,10 @@@
buf_len = sizeof(*desc_src) * bd_num_max; desc_src = kzalloc(buf_len, GFP_KERNEL); - if (!desc_src) - return -ENOMEM; + if (!desc_src) { + ret = -ENOMEM; + goto out; + }
for (i = 0; i < dfx_reg_type_num; i++) { bd_num = bd_num_list[i]; @@@ -12126,8 -11534,6 +12125,8 @@@ }
kfree(desc_src); +out: + kfree(bd_num_list); return ret; }
@@@ -12471,8 -11877,8 +12470,8 @@@ static int hclge_get_module_eeprom(stru static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, - .flr_prepare = hclge_flr_prepare, - .flr_done = hclge_flr_done, + .reset_prepare = hclge_reset_prepare_general, + .reset_done = hclge_reset_done, .init_client_instance = hclge_init_client_instance, .uninit_client_instance = hclge_uninit_client_instance, .map_ring_to_vector = hclge_map_ring_to_vector, @@@ -12537,6 -11943,7 +12536,6 @@@ .get_link_mode = hclge_get_link_mode, .add_fd_entry = hclge_add_fd_entry, .del_fd_entry = hclge_del_fd_entry, - .del_all_fd_entries = hclge_del_all_fd_entries, .get_fd_rule_cnt = hclge_get_fd_rule_cnt, .get_fd_rule_info = hclge_get_fd_rule_info, .get_fd_all_rules = hclge_get_all_rules, @@@ -12564,8 -11971,6 +12563,8 @@@ .add_cls_flower = hclge_add_cls_flower, .del_cls_flower = hclge_del_cls_flower, .cls_flower_active = hclge_is_cls_flower_active, + .get_phy_link_ksettings = hclge_get_phy_link_ksettings, + .set_phy_link_ksettings = hclge_set_phy_link_ksettings, };
static struct hnae3_ae_algo ae_algo = { diff --combined drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index c7d5c1726499,e295d359e912..07aa26ba0966 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@@ -497,6 -497,7 +497,6 @@@ void hclgevf_update_link_status(struct
link_state = test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state; - if (link_state != hdev->hw.mac.link) { client->ops->link_status_change(handle, !!link_state); if (rclient && rclient->ops->link_status_change) @@@ -706,9 -707,6 +706,9 @@@ static int hclgevf_set_rss_tc_mode(stru (tc_valid[i] & 0x1)); hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M, HCLGEVF_RSS_TC_SIZE_S, tc_size[i]); + hnae3_set_bit(mode, HCLGEVF_RSS_TC_SIZE_MSB_B, + tc_size[i] >> HCLGEVF_RSS_TC_SIZE_MSB_OFFSET & + 0x1); hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M, HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
@@@ -1243,11 -1241,12 +1243,11 @@@ static void hclgevf_sync_promisc_mode(s } }
-static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) +static int hclgevf_tqp_enable_cmd_send(struct hclgevf_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclgevf_cfg_com_tqp_queue_cmd *req; struct hclgevf_desc desc; - int status;
req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data;
@@@ -1258,22 -1257,12 +1258,22 @@@ if (enable) req->enable |= 1U << HCLGEVF_TQP_ENABLE_B;
- status = hclgevf_cmd_send(&hdev->hw, &desc, 1); - if (status) - dev_err(&hdev->pdev->dev, - "TQP enable fail, status =%d.\n", status); + return hclgevf_cmd_send(&hdev->hw, &desc, 1); +}
- return status; +static int hclgevf_tqp_enable(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclgevf_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + + return 0; }
static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) @@@ -1722,39 -1711,20 +1722,39 @@@ static int hclgevf_en_hw_strip_rxvtag(s return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); }
-static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclgevf_reset_tqp(struct hnae3_handle *handle) { +#define HCLGEVF_RESET_ALL_QUEUE_DONE 1U struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; + u8 return_status = 0; int ret; + u16 i;
/* disable vf queue before send queue reset msg to PF */ - ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); - if (ret) + ret = hclgevf_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, "failed to disable tqp, ret = %d\n", + ret); return ret; + }
hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); - memcpy(send_msg.data, &queue_id, sizeof(queue_id)); - return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &return_status, + sizeof(return_status)); + if (ret || return_status == HCLGEVF_RESET_ALL_QUEUE_DONE) + return ret; + + for (i = 1; i < handle->kinfo.num_tqps; i++) { + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); + memcpy(send_msg.data, &i, sizeof(i)); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + if (ret) + return ret; + } + + return 0; }
static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu) @@@ -2114,11 -2084,10 +2114,11 @@@ static void hclgevf_enable_vector(struc writel(en ? 1 : 0, vector->addr); }
-static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev) +static void hclgevf_reset_prepare_general(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type) { -#define HCLGEVF_FLR_RETRY_WAIT_MS 500 -#define HCLGEVF_FLR_RETRY_CNT 5 +#define HCLGEVF_RESET_RETRY_WAIT_MS 500 +#define HCLGEVF_RESET_RETRY_CNT 5
struct hclgevf_dev *hdev = ae_dev->priv; int retry_cnt = 0; @@@ -2127,31 -2096,29 +2127,31 @@@ retry: down(&hdev->reset_sem); set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - hdev->reset_type = HNAE3_FLR_RESET; + hdev->reset_type = rst_type; ret = hclgevf_reset_prepare(hdev); if (ret) { - dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n", ret); if (hdev->reset_pending || - retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) { + retry_cnt++ < HCLGEVF_RESET_RETRY_CNT) { dev_err(&hdev->pdev->dev, "reset_pending:0x%lx, retry_cnt:%d\n", hdev->reset_pending, retry_cnt); clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); up(&hdev->reset_sem); - msleep(HCLGEVF_FLR_RETRY_WAIT_MS); + msleep(HCLGEVF_RESET_RETRY_WAIT_MS); goto retry; } }
- /* disable misc vector before FLR done */ + /* disable misc vector before reset done */ hclgevf_enable_vector(&hdev->misc_vector, false); - hdev->rst_stats.flr_rst_cnt++; + + if (hdev->reset_type == HNAE3_FLR_RESET) + hdev->rst_stats.flr_rst_cnt++; }
-static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev) +static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) { struct hclgevf_dev *hdev = ae_dev->priv; int ret; @@@ -2226,7 -2193,7 +2226,7 @@@ static void hclgevf_reset_service_task(
if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { - /* PF has initmated that it is about to reset the hardware. + /* PF has intimated that it is about to reset the hardware. * We now have to poll & check if hardware has actually * completed the reset sequence. On hardware reset completion, * VF needs to reset the client and ae device. @@@ -2389,6 -2356,7 +2389,6 @@@ static enum hclgevf_evt_cause hclgevf_c /* fetch the events from their corresponding regs */ cmdq_stat_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG); - if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) { rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING); dev_info(&hdev->pdev->dev, @@@ -2656,25 -2624,28 +2656,25 @@@ static int hclgevf_ae_start(struct hnae { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); + hclgevf_reset_tqp_stats(handle);
hclgevf_request_link_info(hdev);
hclgevf_update_link_mode(hdev);
- clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); - return 0; }
static void hclgevf_ae_stop(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - int i;
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
if (hdev->reset_type != HNAE3_VF_RESET) - for (i = 0; i < handle->kinfo.num_tqps; i++) - if (hclgevf_reset_tqp(handle, i)) - break; + hclgevf_reset_tqp(handle);
hclgevf_reset_tqp_stats(handle); hclgevf_update_link_status(hdev, 0); @@@ -3526,7 -3497,7 +3526,7 @@@ static int hclgevf_set_channels(struct if (ret) return ret;
- /* RSS indirection table has been configuared by user */ + /* RSS indirection table has been configured by user */ if (rxfh_configured) goto out;
@@@ -3751,8 -3722,8 +3751,8 @@@ void hclgevf_update_port_base_vlan_info static const struct hnae3_ae_ops hclgevf_ops = { .init_ae_dev = hclgevf_init_ae_dev, .uninit_ae_dev = hclgevf_uninit_ae_dev, - .flr_prepare = hclgevf_flr_prepare, - .flr_done = hclgevf_flr_done, + .reset_prepare = hclgevf_reset_prepare_general, + .reset_done = hclgevf_reset_done, .init_client_instance = hclgevf_init_client_instance, .uninit_client_instance = hclgevf_uninit_client_instance, .start = hclgevf_ae_start, diff --combined drivers/net/ethernet/intel/i40e/i40e_debugfs.c index e8230da29f05,d627b59ad446..291e61ac3e44 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@@ -578,6 -578,9 +578,9 @@@ static void i40e_dbg_dump_desc(int cnt case RING_TYPE_XDP: ring = kmemdup(vsi->xdp_rings[ring_id], sizeof(*ring), GFP_KERNEL); break; + default: + ring = NULL; + break; } if (!ring) return; @@@ -651,7 -654,7 +654,7 @@@ static void i40e_dbg_dump_vsi_no_seid(s }
/** - * i40e_dbg_dump_stats - handles dump stats write into command datum + * i40e_dbg_dump_eth_stats - handles dump stats write into command datum * @pf: the i40e_pf created in command write * @estats: the eth stats structure to be dumped **/ @@@ -1638,7 -1641,7 +1641,7 @@@ static const struct file_operations i40 static char i40e_dbg_netdev_ops_buf[256] = "";
/** - * i40e_dbg_netdev_ops - read for netdev_ops datum + * i40e_dbg_netdev_ops_read - read for netdev_ops datum * @filp: the opened file * @buffer: where to write the data for the user to read * @count: the size of the user's buffer diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c4c167650b6b,0e92668012e3..040a01400b85 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@@ -212,7 -212,7 +212,7 @@@ static void __i40e_add_stat_strings(u8 }
/** - * 40e_add_stat_strings - copy stat strings into ethtool buffer + * i40e_add_stat_strings - copy stat strings into ethtool buffer * @p: ethtool supplied buffer * @stats: stat definitions array * @@@ -232,6 -232,8 +232,8 @@@ I40E_STAT(struct i40e_vsi, _name, _stat) #define I40E_VEB_STAT(_name, _stat) \ I40E_STAT(struct i40e_veb, _name, _stat) + #define I40E_VEB_TC_STAT(_name, _stat) \ + I40E_STAT(struct i40e_cp_veb_tc_stats, _name, _stat) #define I40E_PFC_STAT(_name, _stat) \ I40E_STAT(struct i40e_pfc_stats, _name, _stat) #define I40E_QUEUE_STAT(_name, _stat) \ @@@ -266,11 -268,18 +268,18 @@@ static const struct i40e_stats i40e_gst I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol), };
+ struct i40e_cp_veb_tc_stats { + u64 tc_rx_packets; + u64 tc_rx_bytes; + u64 tc_tx_packets; + u64 tc_tx_bytes; + }; + static const struct i40e_stats i40e_gstrings_veb_tc_stats[] = { - I40E_VEB_STAT("veb.tc_%u_tx_packets", tc_stats.tc_tx_packets), - I40E_VEB_STAT("veb.tc_%u_tx_bytes", tc_stats.tc_tx_bytes), - I40E_VEB_STAT("veb.tc_%u_rx_packets", tc_stats.tc_rx_packets), - I40E_VEB_STAT("veb.tc_%u_rx_bytes", tc_stats.tc_rx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_tx_packets", tc_tx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_tx_bytes", tc_tx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_rx_packets", tc_rx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_rx_bytes", tc_rx_bytes), };
static const struct i40e_stats i40e_gstrings_misc_stats[] = { @@@ -1101,6 -1110,7 +1110,7 @@@ static int i40e_get_link_ksettings(stru
/* Set flow control settings */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause);
switch (hw->fc.requested_mode) { case I40E_FC_FULL: @@@ -2216,6 -2226,29 +2226,29 @@@ static int i40e_get_sset_count(struct n } }
+ /** + * i40e_get_veb_tc_stats - copy VEB TC statistics to formatted structure + * @tc: the TC statistics in VEB structure (veb->tc_stats) + * @i: the index of traffic class in (veb->tc_stats) structure to copy + * + * Copy VEB TC statistics from structure of arrays (veb->tc_stats) to + * one dimensional structure i40e_cp_veb_tc_stats. + * Produce formatted i40e_cp_veb_tc_stats structure of the VEB TC + * statistics for the given TC. + **/ + static struct i40e_cp_veb_tc_stats + i40e_get_veb_tc_stats(struct i40e_veb_tc_stats *tc, unsigned int i) + { + struct i40e_cp_veb_tc_stats veb_tc = { + .tc_rx_packets = tc->tc_rx_packets[i], + .tc_rx_bytes = tc->tc_rx_bytes[i], + .tc_tx_packets = tc->tc_tx_packets[i], + .tc_tx_bytes = tc->tc_tx_bytes[i], + }; + + return veb_tc; + } + /** * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure * @pf: the PF device structure @@@ -2300,8 -2333,16 +2333,16 @@@ static void i40e_get_ethtool_stats(stru i40e_gstrings_veb_stats);
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) - i40e_add_ethtool_stats(&data, veb_stats ? veb : NULL, - i40e_gstrings_veb_tc_stats); + if (veb_stats) { + struct i40e_cp_veb_tc_stats veb_tc = + i40e_get_veb_tc_stats(&veb->tc_stats, i); + + i40e_add_ethtool_stats(&data, &veb_tc, + i40e_gstrings_veb_tc_stats); + } else { + i40e_add_ethtool_stats(&data, NULL, + i40e_gstrings_veb_tc_stats); + }
i40e_add_ethtool_stats(&data, pf, i40e_gstrings_stats);
@@@ -2368,15 -2409,21 +2409,15 @@@ static void i40e_get_priv_flag_strings( struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - char *p = (char *)data; unsigned int i; + u8 *p = data;
- for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - i40e_gstrings_priv_flags[i].flag_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) + ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string); if (pf->hw.pf_id != 0) return; - for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - i40e_gl_gstrings_priv_flags[i].flag_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) + ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string); }
static void i40e_get_strings(struct net_device *netdev, u32 stringset, @@@ -5433,7 -5480,7 +5474,7 @@@ static int i40e_get_module_eeprom(struc
status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - true, addr, offset, &value, NULL); + addr, true, offset, &value, NULL); if (status) return -EIO; data[i] = value; diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c index 1555d6009bf5,30ad7c08d0fb..9502e043a0b7 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@@ -2023,7 -2023,7 +2023,7 @@@ static void i40e_undo_add_filter_entrie }
/** - * i40e_next_entry - Get the next non-broadcast filter from a list + * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we @@@ -2560,8 -2560,7 +2560,7 @@@ int i40e_sync_vsi_filters(struct i40e_v i40e_stat_str(hw, aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { - dev_info(&pf->pdev->dev, "%s is %s allmulti mode.\n", - vsi->netdev->name, + dev_info(&pf->pdev->dev, "%s allmulti mode.\n", cur_multipromisc ? "entering" : "leaving"); } } @@@ -5204,7 -5203,7 +5203,7 @@@ static u8 i40e_pf_get_num_tc(struct i40 }
/** - * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes + * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF. @@@ -6738,9 -6737,9 +6737,9 @@@ out set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } - /* registers are set, lets apply */ - if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) - ret = i40e_hw_set_dcb_config(pf, new_cfg); + /* registers are set, lets apply */ + if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) + ret = i40e_hw_set_dcb_config(pf, new_cfg); }
err: @@@ -7339,7 -7338,7 +7338,7 @@@ static void i40e_vsi_set_default_tc_con qcount = min_t(int, vsi->alloc_queue_pairs, i40e_pf_get_max_q_per_tc(vsi->back)); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - /* For the TC that is not enabled set the offset to to default + /* For the TC that is not enabled set the offset to default * queue and allocate one queue for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; @@@ -9467,7 -9466,7 +9466,7 @@@ static void i40e_fdir_flush_and_replay( }
/** - * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed + * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure **/ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) @@@ -10573,12 -10572,6 +10572,6 @@@ static void i40e_rebuild(struct i40e_p goto end_core_reset; }
- if (!lock_acquired) - rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); - if (ret) - goto end_unlock; - #ifdef CONFIG_I40E_DCB /* Enable FW to write a default DCB config on link-up * unless I40E_FLAG_TC_MQPRIO was enabled or DCB @@@ -10593,7 -10586,7 +10586,7 @@@ i40e_aq_set_dcb_parameters(hw, false, NULL); dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; } else { i40e_aq_set_dcb_parameters(hw, true, NULL); ret = i40e_init_pf_dcb(pf); @@@ -10607,6 -10600,11 +10600,11 @@@ }
#endif /* CONFIG_I40E_DCB */ + if (!lock_acquired) + rtnl_lock(); + ret = i40e_setup_pf_switch(pf, reinit); + if (ret) + goto end_unlock;
/* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. @@@ -10625,7 -10623,7 +10623,7 @@@ * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try - * try to recover minimal use by getting the basic PF VSI working. + * to recover minimal use by getting the basic PF VSI working. */ if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); @@@ -15140,12 -15138,16 +15138,16 @@@ static int i40e_init_recovery_mode(stru * in order to register the netdev */ v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); - if (v_idx < 0) + if (v_idx < 0) { + err = v_idx; goto err_switch_setup; + } pf->lan_vsi = v_idx; vsi = pf->vsi[v_idx]; - if (!vsi) + if (!vsi) { + err = -EFAULT; goto err_switch_setup; + } vsi->alloc_queue_pairs = 1; err = i40e_config_netdev(vsi); if (err) diff --combined drivers/net/ethernet/intel/i40e/i40e_txrx.c index fc20afc23bfa,06b4271219b1..121cd99fdeff --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@@ -2295,8 -2295,7 +2295,7 @@@ int i40e_xmit_xdp_tx_ring(struct xdp_bu * @rx_ring: Rx ring being processed * @xdp: XDP buffer containing the frame **/ - static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, - struct xdp_buff *xdp) + static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { int err, result = I40E_XDP_PASS; struct i40e_ring *xdp_ring; @@@ -2335,7 -2334,7 +2334,7 @@@ } xdp_out: rcu_read_unlock(); - return ERR_PTR(-result); + return result; }
/** @@@ -2448,6 -2447,7 +2447,7 @@@ static int i40e_clean_rx_irq(struct i40 unsigned int xdp_xmit = 0; bool failure = false; struct xdp_buff xdp; + int xdp_res = 0;
#if (PAGE_SIZE < 8192) frame_sz = i40e_rx_frame_truesize(rx_ring, 0); @@@ -2513,12 -2513,10 +2513,10 @@@ /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size); #endif - skb = i40e_run_xdp(rx_ring, &xdp); + xdp_res = i40e_run_xdp(rx_ring, &xdp); }
- if (IS_ERR(skb)) { - unsigned int xdp_res = -PTR_ERR(skb); - + if (xdp_res) { if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { xdp_xmit |= xdp_res; i40e_rx_buffer_flip(rx_ring, rx_buffer, size); @@@ -3333,7 -3331,7 +3331,7 @@@ static int i40e_tx_enable_csum(struct s }
/** - * i40e_create_tx_ctx Build the Tx context descriptor + * i40e_create_tx_ctx - Build the Tx context descriptor * @tx_ring: ring to create the descriptor on * @cd_type_cmd_tso_mss: Quad Word 1 * @cd_tunneling: Quad Word 0 - bits 0-31 @@@ -3835,8 -3833,8 +3833,8 @@@ netdev_tx_t i40e_lan_xmit_frame(struct * @frames: array of XDP buffer pointers * @flags: XDP extra info * - * Returns number of frames successfully sent. Frames that fail are - * free'ed via XDP return API. + * Returns number of frames successfully sent. Failed frames + * will be free'ed by XDP core. * * For error cases, a negative errno code is returned and no-frames * are transmitted (caller must handle freeing frames). @@@ -3849,7 -3847,7 +3847,7 @@@ int i40e_xdp_xmit(struct net_device *de struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *xdp_ring; - int drops = 0; + int nxmit = 0; int i;
if (test_bit(__I40E_VSI_DOWN, vsi->state)) @@@ -3869,13 -3867,14 +3867,13 @@@ int err;
err = i40e_xmit_xdp_ring(xdpf, xdp_ring); - if (err != I40E_XDP_TX) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err != I40E_XDP_TX) + break; + nxmit++; }
if (unlikely(flags & XDP_XMIT_FLUSH)) i40e_xdp_ring_update_tail(xdp_ring);
- return n - drops; + return nxmit; } diff --combined drivers/net/ethernet/intel/i40e/i40e_xsk.c index d89c22347d9d,12ca84113587..46d884417c63 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@@ -160,13 -160,6 +160,13 @@@ static int i40e_run_xdp_zc(struct i40e_ xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp);
+ if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; + rcu_read_unlock(); + return result; + } + switch (act) { case XDP_PASS: break; @@@ -174,6 -167,10 +174,6 @@@ xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); break; - case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; - break; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@@ -474,7 -471,7 +474,7 @@@ static bool i40e_xmit_zc(struct i40e_ri
nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); if (!nb_pkts) - return false; + return true;
if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; @@@ -491,7 -488,7 +491,7 @@@
i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
- return true; + return nb_pkts < budget; }
/** @@@ -628,7 -625,7 +628,7 @@@ void i40e_xsk_clean_rx_ring(struct i40e }
/** - * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown + * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown * @tx_ring: XDP Tx ring **/ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) diff --combined drivers/net/ethernet/intel/ice/ice.h index 721afa0f0a88,17101c45cbcd..07777ac4f098 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@@ -73,7 -73,7 +73,7 @@@ #define ICE_MIN_LAN_TXRX_MSIX 1 #define ICE_MIN_LAN_OICR_MSIX 1 #define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) -#define ICE_FDIR_MSIX 1 +#define ICE_FDIR_MSIX 2 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@@ -84,12 -84,9 +84,12 @@@ #define ICE_MAX_LG_RSS_QS 256 #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) +/* All VF control VSIs share the same IRQ, so assign a unique ID for them */ +#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_MISC_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_INVAL_VFID 256
+#define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ #define ICE_MAX_RESET_WAIT 20
#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@@ -193,13 -190,12 +193,12 @@@ struct ice_sw u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */ };
-enum ice_state { +enum ice_pf_state { __ICE_TESTING, __ICE_DOWN, __ICE_NEEDS_RESTART, __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ - __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */ __ICE_PFR_REQ, /* set by driver and peers */ __ICE_CORER_REQ, /* set by driver and peers */ __ICE_GLOBR_REQ, /* set by driver and peers */ @@@ -232,18 -228,15 +231,18 @@@ __ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ __ICE_LINK_DEFAULT_OVERRIDE_PENDING, __ICE_PHY_INIT_COMPLETE, + __ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ __ICE_STATE_NBITS /* must be last */ };
-enum ice_vsi_flags { - ICE_VSI_FLAG_UMAC_FLTR_CHANGED, - ICE_VSI_FLAG_MMAC_FLTR_CHANGED, - ICE_VSI_FLAG_VLAN_FLTR_CHANGED, - ICE_VSI_FLAG_PROMISC_CHANGED, - ICE_VSI_FLAG_NBITS /* must be last */ +enum ice_vsi_state { + ICE_VSI_DOWN, + ICE_VSI_NEEDS_RESTART, + ICE_VSI_UMAC_FLTR_CHANGED, + ICE_VSI_MMAC_FLTR_CHANGED, + ICE_VSI_VLAN_FLTR_CHANGED, + ICE_VSI_PROMISC_CHANGED, + ICE_VSI_STATE_NBITS /* must be last */ };
/* struct that defines a VSI, associated with a dev */ @@@ -259,12 -252,14 +258,12 @@@ struct ice_vsi irqreturn_t (*irq_handler)(int irq, void *data);
u64 tx_linearize; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); - DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); + DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS); unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; u32 rx_buf_failed; u32 rx_page_failed; - u32 rx_gro_dropped; u16 num_q_vectors; u16 base_vector; /* IRQ base for OS reserved vectors */ enum ice_vsi_type type; @@@ -504,7 -499,7 +503,7 @@@ ice_irq_dynamic_ena(struct ice_hw *hw, val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr << GLINT_DYN_CTL_ITR_INDX_S); if (vsi) - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return; wr32(hw, GLINT_DYN_CTL(vector), val); } @@@ -621,16 -616,14 +620,16 @@@ int ice_destroy_xdp_rings(struct ice_vs int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed); +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); const char *ice_stat_str(enum ice_status stat_err); const char *ice_aq_str(enum ice_aq_err aq_err); - bool ice_is_wol_supported(struct ice_pf *pf); + bool ice_is_wol_supported(struct ice_hw *hw); int ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add, bool is_tun); @@@ -648,6 -641,7 +647,7 @@@ int ice_fdir_create_dflt_rules(struct i int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, struct ice_rq_event_info *event); int ice_open(struct net_device *netdev); + int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf);
diff --combined drivers/net/ethernet/intel/ice/ice_common.c index b13a630ea1b7,a20edf1538a0..e93b1e40f627 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@@ -158,10 -158,6 +158,10 @@@ ice_aq_get_phy_caps(struct ice_port_inf return ICE_ERR_PARAM; hw = pi->hw;
+ if (report_mode == ICE_AQC_REPORT_DFLT_CFG && + !ice_fw_supports_report_dflt_cfg(hw)) + return ICE_ERR_PARAM; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
if (qual_mods) @@@ -195,7 -191,7 +195,7 @@@ ice_debug(hw, ICE_DBG_LINK, " module_type[2] = 0x%x\n", pcaps->module_type[2]);
- if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) { + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) { pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high); memcpy(pi->phy.link_info.module_type, &pcaps->module_type, @@@ -721,8 -717,8 +721,8 @@@ static enum ice_status ice_cfg_fw_log(s
if (!data) { data = devm_kcalloc(ice_hw_to_dev(hw), - sizeof(*data), ICE_AQC_FW_LOG_ID_MAX, + sizeof(*data), GFP_KERNEL); if (!data) return ICE_ERR_NO_MEMORY; @@@ -926,8 -922,7 +926,8 @@@ enum ice_status ice_init_hw(struct ice_
/* Initialize port_info struct with PHY capabilities */ status = ice_aq_get_phy_caps(hw->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, + NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); if (status) dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n", @@@ -1297,85 -1292,6 +1297,85 @@@ const struct ice_ctx_ele ice_tlan_ctx_i */ DEFINE_MUTEX(ice_global_cfg_lock_sw);
+/** + * ice_should_retry_sq_send_cmd + * @opcode: AQ opcode + * + * Decide if we should retry the send command routine for the ATQ, depending + * on the opcode. + */ +static bool ice_should_retry_sq_send_cmd(u16 opcode) +{ + switch (opcode) { + case ice_aqc_opc_get_link_topo: + case ice_aqc_opc_lldp_stop: + case ice_aqc_opc_lldp_start: + case ice_aqc_opc_lldp_filter_ctrl: + return true; + } + + return false; +} + +/** + * ice_sq_send_cmd_retry - send command to Control Queue (ATQ) + * @hw: pointer to the HW struct + * @cq: pointer to the specific Control queue + * @desc: prefilled descriptor describing the command + * @buf: buffer to use for indirect commands (or NULL for direct commands) + * @buf_size: size of buffer for indirect commands (or 0 for direct commands) + * @cd: pointer to command details structure + * + * Retry sending the FW Admin Queue command, multiple times, to the FW Admin + * Queue if the EBUSY AQ error is returned. + */ +static enum ice_status +ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc_cpy; + enum ice_status status; + bool is_cmd_for_retry; + u8 *buf_cpy = NULL; + u8 idx = 0; + u16 opcode; + + opcode = le16_to_cpu(desc->opcode); + is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode); + memset(&desc_cpy, 0, sizeof(desc_cpy)); + + if (is_cmd_for_retry) { + if (buf) { + buf_cpy = kzalloc(buf_size, GFP_KERNEL); + if (!buf_cpy) + return ICE_ERR_NO_MEMORY; + } + + memcpy(&desc_cpy, desc, sizeof(desc_cpy)); + } + + do { + status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd); + + if (!is_cmd_for_retry || !status || + hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY) + break; + + if (buf_cpy) + memcpy(buf, buf_cpy, buf_size); + + memcpy(desc, &desc_cpy, sizeof(desc_cpy)); + + mdelay(ICE_SQ_SEND_DELAY_TIME_MS); + + } while (++idx < ICE_SQ_SEND_MAX_EXECUTE); + + kfree(buf_cpy); + + return status; +} + /** * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue * @hw: pointer to the HW struct @@@ -1417,7 -1333,7 +1417,7 @@@ ice_aq_send_cmd(struct ice_hw *hw, stru break; }
- status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); + status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd); if (lock_acquired) mutex_unlock(&ice_global_cfg_lock_sw);
@@@ -2739,7 -2655,7 +2739,7 @@@ enum ice_status ice_update_link_info(st if (!pcaps) return ICE_ERR_NO_MEMORY;
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL);
devm_kfree(ice_hw_to_dev(hw), pcaps); @@@ -2899,8 -2815,8 +2899,8 @@@ ice_set_fc(struct ice_port_info *pi, u return ICE_ERR_NO_MEMORY;
/* Get the current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, + pcaps, NULL); if (status) { *aq_failures = ICE_SET_FC_AQ_FAIL_GET; goto out; @@@ -3013,6 -2929,17 +3013,6 @@@ ice_copy_phy_caps_to_cfg(struct ice_por cfg->link_fec_opt = caps->link_fec_options; cfg->module_compliance_enforcement = caps->module_compliance_enforcement; - - if (ice_fw_supports_link_override(pi->hw)) { - struct ice_link_default_override_tlv tlv; - - if (ice_get_link_default_override(&tlv, pi)) - return; - - if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) - cfg->module_compliance_enforcement |= - ICE_LINK_OVERRIDE_STRICT_MODE; - } }
/** @@@ -3027,21 -2954,16 +3027,21 @@@ ice_cfg_phy_fec(struct ice_port_info *p { struct ice_aqc_get_phy_caps_data *pcaps; enum ice_status status; + struct ice_hw *hw;
if (!pi || !cfg) return ICE_ERR_BAD_PTR;
+ hw = pi->hw; + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return ICE_ERR_NO_MEMORY;
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, + (ice_fw_supports_report_dflt_cfg(hw) ? + ICE_AQC_REPORT_DFLT_CFG : + ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL); if (status) goto out;
@@@ -3080,8 -3002,7 +3080,8 @@@ break; }
- if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) { + if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) && + !ice_fw_supports_report_dflt_cfg(hw)) { struct ice_link_default_override_tlv tlv;
if (ice_get_link_default_override(&tlv, pi)) @@@ -3265,7 -3186,7 +3265,7 @@@ ice_aq_sff_eeprom(struct ice_hw *hw, u1
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom); cmd = &desc.params.read_write_sff_param; - desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF); + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); cmd->lport_num = (u8)(lport & 0xff); cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & @@@ -3285,33 -3206,23 +3285,33 @@@ /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_id: VSI FW index - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer - * @glob_lut_idx: global LUT index + * @params: RSS LUT parameters * @set: set true to set the table, false to get the table * * Internal function to get (0x0B05) or set (0x0B03) RSS look up table */ static enum ice_status -__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, - u16 lut_size, u8 glob_lut_idx, bool set) +__ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set) { + u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle; struct ice_aqc_get_set_rss_lut *cmd_resp; struct ice_aq_desc desc; enum ice_status status; - u16 flags = 0; + u8 *lut; + + if (!params) + return ICE_ERR_PARAM; + + vsi_handle = params->vsi_handle; + lut = params->lut; + + if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) + return ICE_ERR_PARAM; + + lut_size = params->lut_size; + lut_type = params->lut_type; + glob_lut_idx = params->global_lut_id; + vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
cmd_resp = &desc.params.get_set_rss_lut;
@@@ -3385,27 -3296,43 +3385,27 @@@ ice_aq_get_set_rss_lut_exit /** * ice_aq_get_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @get_params: RSS LUT parameters used to specify which RSS LUT to get * * get the RSS lookup table, PF or VSI type */ enum ice_status -ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) +ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, false); + return __ice_aq_get_set_rss_lut(hw, get_params, false); }
/** * ice_aq_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @set_params: RSS LUT parameters used to specify how to set the RSS LUT * * set the RSS lookup table, PF or VSI type */ enum ice_status -ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) +ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, true); + return __ice_aq_get_set_rss_lut(hw, set_params, true); }
/** @@@ -4446,7 -4373,7 +4446,7 @@@ ice_aq_set_lldp_mib(struct ice_hw *hw, }
/** - * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl + * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl * @hw: pointer to HW struct */ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) @@@ -4491,23 -4418,3 +4491,23 @@@ ice_lldp_fltr_add_remove(struct ice_hw
return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } + +/** + * ice_fw_supports_report_dflt_cfg + * @hw: pointer to the hardware structure + * + * Checks if the firmware supports report default configuration + */ +bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) +{ + if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) { + if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN) + return true; + if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN && + hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH) + return true; + } else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) { + return true; + } + return false; +} diff --combined drivers/net/ethernet/intel/ice/ice_controlq.h index 7d0905f25ddc,68866f4f0eb0..77c2307d4fb8 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@@ -14,8 -14,8 +14,8 @@@ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
#define ICE_CTL_Q_DESC_UNUSED(R) \ - (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) + ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1))
/* Defines that help manage the driver vs FW API checks. * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. @@@ -31,8 -31,8 +31,8 @@@ enum ice_ctl_q ICE_CTL_Q_MAILBOX, };
- /* Control Queue timeout settings - max delay 250ms */ - #define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ + /* Control Queue timeout settings - max delay 1s */ + #define ICE_CTL_Q_SQ_CMD_TIMEOUT 10000 /* Count 10000 times */ #define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ diff --combined drivers/net/ethernet/intel/ice/ice_dcb.c index 85c9eccfdae8,211ac6f907ad..43c6af42de8a --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@@ -738,22 -738,27 +738,27 @@@ ice_aq_get_cee_dcb_cfg(struct ice_hw *h /** * ice_cee_to_dcb_cfg * @cee_cfg: pointer to CEE configuration struct - * @dcbcfg: DCB configuration struct + * @pi: port information structure * * Convert CEE configuration from firmware to DCB configuration */ static void ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, - struct ice_dcbx_cfg *dcbcfg) + struct ice_port_info *pi) { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift; + u8 i, j, err, sync, oper, app_index, ice_app_sel_type; u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); - u8 i, err, sync, oper, app_index, ice_app_sel_type; u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift; + struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg; u16 ice_app_prot_id_type;
- /* CEE PG data to ETS config */ + dcbcfg = &pi->qos_cfg.local_dcbx_cfg; + dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE; + dcbcfg->tlv_status = tlv_status; + + /* CEE PG data */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
/* Note that the FW creates the oper_prio_tc nibbles reversed @@@ -780,10 -785,16 +785,16 @@@ } }
- /* CEE PFC data to ETS config */ + /* CEE PFC data */ dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en; dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+ /* CEE APP TLV data */ + if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING) + cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg; + else + cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg; + app_index = 0; for (i = 0; i < 3; i++) { if (i == 0) { @@@ -802,6 -813,18 +813,18 @@@ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S; ice_app_sel_type = ICE_APP_SEL_TCPIP; ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI; + + for (j = 0; j < cmp_dcbcfg->numapps; j++) { + u16 prot_id = cmp_dcbcfg->app[j].prot_id; + u8 sel = cmp_dcbcfg->app[j].selector; + + if (sel == ICE_APP_SEL_TCPIP && + (prot_id == ICE_APP_PROT_ID_ISCSI || + prot_id == ICE_APP_PROT_ID_ISCSI_860)) { + ice_app_prot_id_type = prot_id; + break; + } + } } else { /* FIP APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M; @@@ -834,7 -857,7 +857,7 @@@ }
/** - * ice_get_ieee_dcb_cfg + * ice_get_ieee_or_cee_dcb_cfg * @pi: port information structure * @dcbx_mode: mode of DCBX (IEEE or CEE) * @@@ -892,11 -915,8 +915,8 @@@ enum ice_status ice_get_dcb_cfg(struct ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL); if (!ret) { /* CEE mode */ - dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; - dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE; - dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status); - ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg); ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE); + ice_cee_to_dcb_cfg(&cee_cfg, pi); } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { /* CEE mode not enabled try querying IEEE data */ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; diff --combined drivers/net/ethernet/intel/ice/ice_ethtool.c index 51d3a929ecfd,32ba71a16165..a39e890100d9 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@@ -60,6 -60,7 +60,6 @@@ static const struct ice_stats ice_gstri ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed), ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), - ICE_VSI_STAT("rx_gro_dropped", rx_gro_dropped), ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), ICE_VSI_STAT("tx_linearize", tx_linearize), ICE_VSI_STAT("tx_busy", tx_busy), @@@ -870,47 -871,68 +870,47 @@@ static void ice_get_strings(struct net_ { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - char *p = (char *)data; unsigned int i; + u8 *p = data;
switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < ICE_VSI_STATS_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_vsi_stats[i].stat_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_VSI_STATS_LEN; i++) + ethtool_sprintf(&p, + ice_gstrings_vsi_stats[i].stat_string);
ice_for_each_alloc_txq(vsi, i) { - snprintf(p, ETH_GSTRING_LEN, - "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); }
ice_for_each_alloc_rxq(vsi, i) { - snprintf(p, ETH_GSTRING_LEN, - "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); }
if (vsi->type != ICE_VSI_PF) return;
- for (i = 0; i < ICE_PF_STATS_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_pf_stats[i].stat_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_PF_STATS_LEN; i++) + ethtool_sprintf(&p, + ice_gstrings_pf_stats[i].stat_string);
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - snprintf(p, ETH_GSTRING_LEN, - "tx_priority_%u_xon.nic", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, - "tx_priority_%u_xoff.nic", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i); + ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i); } for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - snprintf(p, ETH_GSTRING_LEN, - "rx_priority_%u_xon.nic", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, - "rx_priority_%u_xoff.nic", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i); + ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i); } break; case ETH_SS_TEST: memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN); break; case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_priv_flags[i].name); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) + ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name); break; default: break; @@@ -1059,7 -1081,7 +1059,7 @@@ ice_get_fecparam(struct net_device *net if (!caps) return -ENOMEM;
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EAGAIN; @@@ -1094,15 -1116,24 +1094,15 @@@ static int ice_nway_reset(struct net_de { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - struct ice_port_info *pi; - enum ice_status status; + int err;
- pi = vsi->port_info; /* If VSI state is up, then restart autoneg with link up */ if (!test_bit(__ICE_DOWN, vsi->back->state)) - status = ice_aq_set_link_restart_an(pi, true, NULL); + err = ice_set_link(vsi, true); else - status = ice_aq_set_link_restart_an(pi, false, NULL); - - if (status) { - netdev_info(netdev, "link restart failed, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(pi->hw->adminq.sq_last_status)); - return -EIO; - } + err = ice_set_link(vsi, false);
- return 0; + return err; }
/** @@@ -1444,8 -1475,8 +1444,8 @@@ void ice_mask_min_supported_speeds(u64 do { \ if (req_speeds & (aq_link_speed) || \ (!req_speeds && \ - (adv_phy_type_lo & phy_type_mask_lo || \ - adv_phy_type_hi & phy_type_mask_hi))) \ + (advert_phy_type_lo & phy_type_mask_lo || \ + advert_phy_type_hi & phy_type_mask_hi))) \ ethtool_link_ksettings_add_link_mode(ks, advertising,\ ethtool_link_mode); \ } while (0) @@@ -1462,10 -1493,10 +1462,10 @@@ ice_phy_type_to_ethtool(struct net_devi struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + u64 advert_phy_type_lo = 0; + u64 advert_phy_type_hi = 0; u64 phy_type_mask_lo = 0; u64 phy_type_mask_hi = 0; - u64 adv_phy_type_lo = 0; - u64 adv_phy_type_hi = 0; u64 phy_types_high = 0; u64 phy_types_low = 0; u16 req_speeds; @@@ -1483,35 -1514,28 +1483,35 @@@ * requested by user. */ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) { - struct ice_link_default_override_tlv *ldo; - - ldo = &pf->link_dflt_override; phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo); phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
ice_mask_min_supported_speeds(phy_types_high, &phy_types_low); - - /* If override enabled and PHY mask set, then - * Advertising link mode is the intersection of the PHY - * types without media and the override PHY mask. + /* determine advertised modes based on link override only + * if it's supported and if the FW doesn't abstract the + * driver from having to account for link overrides */ - if (ldo->options & ICE_LINK_OVERRIDE_EN && - (ldo->phy_type_low || ldo->phy_type_high)) { - adv_phy_type_lo = - le64_to_cpu(pf->nvm_phy_type_lo) & - ldo->phy_type_low; - adv_phy_type_hi = - le64_to_cpu(pf->nvm_phy_type_hi) & - ldo->phy_type_high; + if (ice_fw_supports_link_override(&pf->hw) && + !ice_fw_supports_report_dflt_cfg(&pf->hw)) { + struct ice_link_default_override_tlv *ldo; + + ldo = &pf->link_dflt_override; + /* If override enabled and PHY mask set, then + * Advertising link mode is the intersection of the PHY + * types without media and the override PHY mask. + */ + if (ldo->options & ICE_LINK_OVERRIDE_EN && + (ldo->phy_type_low || ldo->phy_type_high)) { + advert_phy_type_lo = + le64_to_cpu(pf->nvm_phy_type_lo) & + ldo->phy_type_low; + advert_phy_type_hi = + le64_to_cpu(pf->nvm_phy_type_hi) & + ldo->phy_type_high; + } } } else { + /* strict mode */ phy_types_low = vsi->port_info->phy.phy_type_low; phy_types_high = vsi->port_info->phy.phy_type_high; } @@@ -1519,9 -1543,9 +1519,9 @@@ /* If Advertising link mode PHY type is not using override PHY type, * then use PHY type with media. */ - if (!adv_phy_type_lo && !adv_phy_type_hi) { - adv_phy_type_lo = vsi->port_info->phy.phy_type_low; - adv_phy_type_hi = vsi->port_info->phy.phy_type_high; + if (!advert_phy_type_lo && !advert_phy_type_hi) { + advert_phy_type_lo = vsi->port_info->phy.phy_type_low; + advert_phy_type_hi = vsi->port_info->phy.phy_type_high; }
ethtool_link_ksettings_zero_link_mode(ks, supported); @@@ -1997,7 -2021,7 +1997,7 @@@ ice_get_link_ksettings(struct net_devic return -ENOMEM;
status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) { err = -EIO; goto done; @@@ -2034,7 -2058,7 +2034,7 @@@ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, caps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EIO; goto done; @@@ -2201,14 -2225,13 +2201,14 @@@ ice_set_link_ksettings(struct net_devic const struct ethtool_link_ksettings *ks) { struct ice_netdev_priv *np = netdev_priv(netdev); - struct ethtool_link_ksettings safe_ks, copy_ks; - struct ice_aqc_get_phy_caps_data *abilities; u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT; - u16 adv_link_speed, curr_link_speed, idx; + struct ethtool_link_ksettings copy_ks = *ks; + struct ethtool_link_ksettings safe_ks = {}; + struct ice_aqc_get_phy_caps_data *phy_caps; struct ice_aqc_set_phy_cfg_data config; + u16 adv_link_speed, curr_link_speed; struct ice_pf *pf = np->vsi->back; - struct ice_port_info *p; + struct ice_port_info *pi; u8 autoneg_changed = 0; enum ice_status status; u64 phy_type_high = 0; @@@ -2216,37 -2239,46 +2216,37 @@@ int err = 0; bool linkup;
- p = np->vsi->port_info; - - if (!p) - return -EOPNOTSUPP; + pi = np->vsi->port_info;
- /* Check if this is LAN VSI */ - ice_for_each_vsi(pf, idx) - if (pf->vsi[idx]->type == ICE_VSI_PF) { - if (np->vsi != pf->vsi[idx]) - return -EOPNOTSUPP; - break; - } + if (!pi) + return -EIO;
- if (p->phy.media_type != ICE_MEDIA_BASET && - p->phy.media_type != ICE_MEDIA_FIBER && - p->phy.media_type != ICE_MEDIA_BACKPLANE && - p->phy.media_type != ICE_MEDIA_DA && - p->phy.link_info.link_info & ICE_AQ_LINK_UP) + if (pi->phy.media_type != ICE_MEDIA_BASET && + pi->phy.media_type != ICE_MEDIA_FIBER && + pi->phy.media_type != ICE_MEDIA_BACKPLANE && + pi->phy.media_type != ICE_MEDIA_DA && + pi->phy.link_info.link_info & ICE_AQ_LINK_UP) return -EOPNOTSUPP;
- abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); - if (!abilities) + phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL); + if (!phy_caps) return -ENOMEM;
/* Get the PHY capabilities based on media */ - status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP, - abilities, NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + phy_caps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + phy_caps, NULL); if (status) { - err = -EAGAIN; + err = -EIO; goto done; }
- /* copy the ksettings to copy_ks to avoid modifying the original */ - memcpy(©_ks, ks, sizeof(copy_ks)); - /* save autoneg out of ksettings */ autoneg = copy_ks.base.autoneg;
- memset(&safe_ks, 0, sizeof(safe_ks)); - /* Get link modes supported by hardware.*/ ice_phy_type_to_ethtool(netdev, &safe_ks);
@@@ -2258,7 -2290,7 +2258,7 @@@ __ETHTOOL_LINK_MODE_MASK_NBITS)) { if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EINVAL; + err = -EOPNOTSUPP; goto done; }
@@@ -2295,26 -2327,26 +2295,26 @@@ * configuration is initialized during probe from PHY capabilities * software mode, and updated on set PHY configuration. */ - memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config)); + config = pi->phy.curr_user_phy_cfg;
config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
/* Check autoneg */ - err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed, + err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed, netdev);
if (err) goto done;
/* Call to get the current link speed */ - p->phy.get_link_info = true; - status = ice_get_link_status(p, &linkup); + pi->phy.get_link_info = true; + status = ice_get_link_status(pi, &linkup); if (status) { - err = -EAGAIN; + err = -EIO; goto done; }
- curr_link_speed = p->phy.link_info.link_speed; + curr_link_speed = pi->phy.link_info.link_speed; adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
/* If speed didn't get set, set it to what it currently is. @@@ -2333,7 -2365,7 +2333,7 @@@ }
/* save the requested speeds */ - p->phy.link_info.req_speeds = adv_link_speed; + pi->phy.link_info.req_speeds = adv_link_speed;
/* set link and auto negotiation so changes take effect */ config.caps |= ICE_AQ_PHY_ENA_LINK; @@@ -2341,7 -2373,7 +2341,7 @@@ /* check if there is a PHY type for the requested advertised speed */ if (!(phy_type_low || phy_type_high)) { netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EAGAIN; + err = -EOPNOTSUPP; goto done; }
@@@ -2349,9 -2381,9 +2349,9 @@@ * for set PHY configuration */ config.phy_type_high = cpu_to_le64(phy_type_high) & - abilities->phy_type_high; + phy_caps->phy_type_high; config.phy_type_low = cpu_to_le64(phy_type_low) & - abilities->phy_type_low; + phy_caps->phy_type_low;
if (!(config.phy_type_high || config.phy_type_low)) { /* If there is no intersection and lenient mode is enabled, then @@@ -2365,13 -2397,13 +2365,13 @@@ pf->nvm_phy_type_lo; } else { netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EAGAIN; + err = -EOPNOTSUPP; goto done; } }
/* If link is up put link down */ - if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) { + if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) { /* Tell the OS link is going down, the link will go * back up when fw says it is ready asynchronously */ @@@ -2381,17 -2413,17 +2381,17 @@@ }
/* make the aq call */ - status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL); if (status) { netdev_info(netdev, "Set phy config failed,\n"); - err = -EAGAIN; + err = -EIO; goto done; }
/* Save speed request */ - p->phy.curr_user_speed_req = adv_link_speed; + pi->phy.curr_user_speed_req = adv_link_speed; done: - kfree(abilities); + kfree(phy_caps); clear_bit(__ICE_CFG_BUSY, pf->state);
return err; @@@ -2875,7 -2907,7 +2875,7 @@@ process_link /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ice_down(vsi);
if (tx_rings) { @@@ -2961,7 -2993,7 +2961,7 @@@ ice_get_pauseparam(struct net_device *n return;
/* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) goto out; @@@ -3028,7 -3060,7 +3028,7 @@@ ice_set_pauseparam(struct net_device *n return -ENOMEM;
/* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { kfree(pcaps); @@@ -3129,7 -3161,7 +3129,7 @@@ ice_get_rxfh(struct net_device *netdev struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - int ret = 0, i; + int err, i; u8 *lut;
if (hfunc) @@@ -3148,20 -3180,17 +3148,20 @@@ if (!lut) return -ENOMEM;
- if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) { - ret = -EIO; + err = ice_get_rss_key(vsi, key); + if (err) + goto out; + + err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) goto out; - }
for (i = 0; i < vsi->rss_table_size; i++) indir[i] = (u32)(lut[i]);
out: kfree(lut); - return ret; + return err; }
/** @@@ -3182,7 -3211,7 +3182,7 @@@ ice_set_rxfh(struct net_device *netdev struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct device *dev; - u8 *seed = NULL; + int err;
dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) @@@ -3203,10 -3232,7 +3203,10 @@@ return -ENOMEM; } memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE); - seed = vsi->rss_hkey_user; + + err = ice_set_rss_key(vsi, vsi->rss_hkey_user); + if (err) + return err; }
if (!vsi->rss_lut_user) { @@@ -3227,9 -3253,8 +3227,9 @@@ vsi->rss_size); }
- if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size)) - return -EIO; + err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size); + if (err) + return err;
return 0; } @@@ -3325,9 -3350,10 +3325,9 @@@ static int ice_get_valid_rss_size(struc static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) { struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; struct ice_hw *hw; - int err = 0; + int err; u8 *lut;
dev = ice_pf_to_dev(pf); @@@ -3348,10 -3374,14 +3348,10 @@@
/* create/set RSS LUT */ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) + dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err, ice_aq_str(hw->adminq.sq_last_status)); - err = -EIO; - }
kfree(lut); return err; @@@ -3442,7 -3472,7 +3442,7 @@@ static void ice_get_wol(struct net_devi netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
/* Get WoL settings based on the HW capability */ - if (ice_is_wol_supported(pf)) { + if (ice_is_wol_supported(&pf->hw)) { wol->supported = WAKE_MAGIC; wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0; } else { @@@ -3462,7 -3492,7 +3462,7 @@@ static int ice_set_wol(struct net_devic struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back;
- if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(pf)) + if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw)) return -EOPNOTSUPP;
/* only magic packet is supported */ @@@ -3917,14 -3947,14 +3917,14 @@@ ice_get_module_eeprom(struct net_devic u8 value = 0; u8 page = 0;
- status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, - &value, 1, 0, NULL); - if (status) - return -EIO; - if (!ee || !ee->len || !data) return -EINVAL;
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0, + NULL); + if (status) + return -EIO; + if (value == ICE_MODULE_TYPE_SFP) is_sfp = true;
diff --combined drivers/net/ethernet/intel/ice/ice_lib.c index 5edc0da8b8c3,d13c7fc8fb0a..16d0ee5b48a5 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@@ -343,9 -343,6 +343,9 @@@ static int ice_vsi_clear(struct ice_vs pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL) pf->next_vsi = vsi->idx; + if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && + vsi->vf_id != ICE_INVAL_VFID) + pf->next_vsi = vsi->idx;
ice_vsi_free_arrays(vsi); mutex_unlock(&pf->sw_mutex); @@@ -422,7 -419,7 +422,7 @@@ ice_vsi_alloc(struct ice_pf *pf, enum i
vsi->type = vsi_type; vsi->back = pf; - set_bit(__ICE_DOWN, vsi->state); + set_bit(ICE_VSI_DOWN, vsi->state);
if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); @@@ -457,8 -454,8 +457,8 @@@ goto unlock_pf; }
- if (vsi->type == ICE_VSI_CTRL) { - /* Use the last VSI slot as the index for the control VSI */ + if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) { + /* Use the last VSI slot as the index for PF control VSI */ vsi->idx = pf->num_alloc_vsi - 1; pf->ctrl_vsi_idx = vsi->idx; pf->vsi[vsi->idx] = vsi; @@@ -471,9 -468,6 +471,9 @@@ pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, pf->next_vsi); } + + if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID) + pf->vf[vf_id].ctrl_vsi_idx = vsi->idx; goto unlock_pf;
err_rings: @@@ -512,7 -506,7 +512,7 @@@ static int ice_alloc_fd_res(struct ice_ if (!b_val) return -EPERM;
- if (vsi->type != ICE_VSI_PF) + if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF)) return -EPERM;
if (!test_bit(ICE_FLAG_FD_ENA, pf->flags)) @@@ -523,13 -517,6 +523,13 @@@ /* each VSI gets same "best_effort" quota */ vsi->num_bfltr = b_val;
+ if (vsi->type == ICE_VSI_VF) { + vsi->num_gfltr = 0; + + /* each VSI gets same "best_effort" quota */ + vsi->num_bfltr = b_val; + } + return 0; }
@@@ -742,10 -729,11 +742,10 @@@ static void ice_set_dflt_vsi_ctx(struc */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, tx_count = 0; + u16 offset = 0, qmap = 0, tx_count = 0, pow = 0; + u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - u16 tx_numq_tc, rx_numq_tc; - u16 pow = 0, max_rss = 0; bool ena_tc0 = false; u8 netdev_tc = 0; int i; @@@ -763,15 -751,12 +763,15 @@@ vsi->tc_cfg.ena_tc |= 1; }
- rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc; - if (!rx_numq_tc) - rx_numq_tc = 1; - tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc; - if (!tx_numq_tc) - tx_numq_tc = 1; + num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); + if (!num_rxq_per_tc) + num_rxq_per_tc = 1; + num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc; + if (!num_txq_per_tc) + num_txq_per_tc = 1; + + /* find the (rounded up) power-of-2 of qcount */ + pow = (u16)order_base_2(num_rxq_per_tc);
/* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. @@@ -784,6 -769,26 +784,6 @@@ * * Setup number and offset of Rx queues for all TCs for the VSI */ - - qcount_rx = rx_numq_tc; - - /* qcount will change if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { - if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) { - if (vsi->type == ICE_VSI_PF) - max_rss = ICE_MAX_LG_RSS_QS; - else - max_rss = ICE_MAX_RSS_QS_PER_VF; - qcount_rx = min_t(u16, rx_numq_tc, max_rss); - if (!vsi->req_rxq) - qcount_rx = min_t(u16, qcount_rx, - vsi->rss_size); - } - } - - /* find the (rounded up) power-of-2 of qcount */ - pow = (u16)order_base_2(qcount_rx); - ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ @@@ -797,16 -802,16 +797,16 @@@
/* TC is enabled */ vsi->tc_cfg.tc_info[i].qoffset = offset; - vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; - vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc; + vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc; + vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc; vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & ICE_AQ_VSI_TC_Q_OFFSET_M) | ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); - offset += qcount_rx; - tx_count += tx_numq_tc; + offset += num_rxq_per_tc; + tx_count += num_txq_per_tc; ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); }
@@@ -819,7 -824,7 +819,7 @@@ if (offset) vsi->num_rxq = offset; else - vsi->num_rxq = qcount_rx; + vsi->num_rxq = num_rxq_per_tc;
vsi->num_txq = tx_count;
@@@ -851,8 -856,7 +851,8 @@@ static void ice_set_fd_vsi_ctx(struct i u8 dflt_q_group, dflt_q_prio; u16 dflt_q, report_q, val;
- if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL) + if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL && + vsi->type != ICE_VSI_VF) return;
val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID; @@@ -1175,24 -1179,7 +1175,24 @@@ static int ice_vsi_setup_vector_base(st
num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { + struct ice_vf *vf; + int i; + + ice_for_each_vf(pf, i) { + vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { + base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; + break; + } + } + if (i == pf->num_alloc_vfs) + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + ICE_RES_VF_CTRL_VEC_ID); + } else { + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + vsi->idx); + }
if (base < 0) { dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", @@@ -1326,7 -1313,7 +1326,7 @@@ int ice_vsi_manage_rss_lut(struct ice_v vsi->rss_size); }
- err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size); + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); kfree(lut); return err; } @@@ -1337,10 -1324,12 +1337,10 @@@ */ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { - struct ice_aqc_get_set_rss_keys *key; struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; - int err = 0; - u8 *lut; + u8 *lut, *key; + int err;
dev = ice_pf_to_dev(pf); vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); @@@ -1354,26 -1343,37 +1354,26 @@@ else ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
- status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - - if (status) { - dev_err(dev, "set_rss_lut failed, error %s\n", - ice_stat_str(status)); - err = -EIO; + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) { + dev_err(dev, "set_rss_lut failed, error %d\n", err); goto ice_vsi_cfg_rss_exit; }
- key = kzalloc(sizeof(*key), GFP_KERNEL); + key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL); if (!key) { err = -ENOMEM; goto ice_vsi_cfg_rss_exit; }
if (vsi->rss_hkey_user) - memcpy(key, - (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); else - netdev_rss_key_fill((void *)key, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); - - status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); + netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
- if (status) { - dev_err(dev, "set_rss_key failed, error %s\n", - ice_stat_str(status)); - err = -EIO; - } + err = ice_set_rss_key(vsi, key); + if (err) + dev_err(dev, "set_rss_key failed, error %d\n", err);
kfree(key); ice_vsi_cfg_rss_exit: @@@ -2308,7 -2308,7 +2308,7 @@@ ice_vsi_setup(struct ice_pf *pf, struc struct ice_vsi *vsi; int ret, i;
- if (vsi_type == ICE_VSI_VF) + if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) vsi = ice_vsi_alloc(pf, vsi_type, vf_id); else vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID); @@@ -2323,7 -2323,7 +2323,7 @@@ if (vsi->type == ICE_VSI_PF) vsi->ethtype = ETH_P_PAUSE;
- if (vsi->type == ICE_VSI_VF) + if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL) vsi->vf_id = vf_id;
ice_alloc_fd_res(vsi); @@@ -2593,7 -2593,7 +2593,7 @@@ void ice_vsi_free_rx_rings(struct ice_v */ void ice_vsi_close(struct ice_vsi *vsi) { - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) ice_down(vsi);
ice_vsi_free_irq(vsi); @@@ -2610,17 -2610,17 +2610,17 @@@ int ice_ena_vsi(struct ice_vsi *vsi, bo { int err = 0;
- if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) + if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state)) return 0;
- clear_bit(__ICE_NEEDS_RESTART, vsi->state); + clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
if (vsi->netdev && vsi->type == ICE_VSI_PF) { if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock();
- err = ice_open(vsi->netdev); + err = ice_open_internal(vsi->netdev);
if (!locked) rtnl_unlock(); @@@ -2639,17 -2639,17 +2639,17 @@@ */ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) { - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return;
- set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
if (vsi->type == ICE_VSI_PF && vsi->netdev) { if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock();
- ice_stop(vsi->netdev); + ice_vsi_close(vsi);
if (!locked) rtnl_unlock(); @@@ -2770,24 -2770,7 +2770,24 @@@ int ice_vsi_release(struct ice_vsi *vsi * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type != ICE_VSI_VF) { + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { + struct ice_vf *vf; + int i; + + ice_for_each_vf(pf, i) { + vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) + break; + } + if (i == pf->num_alloc_vfs) { + /* No other VFs left that have control VSI, reclaim SW + * interrupts back to the common pool + */ + ice_free_res(pf->irq_tracker, vsi->base_vector, + ICE_RES_VF_CTRL_VEC_ID); + pf->num_avail_sw_msix += vsi->num_q_vectors; + } + } else if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; @@@ -2812,7 -2795,7 +2812,7 @@@ ice_vsi_free_q_vectors(vsi);
/* make sure unregister_netdev() was called by checking __ICE_DOWN */ - if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) { + if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) { free_netdev(vsi->netdev); vsi->netdev = NULL; } @@@ -2835,46 -2818,38 +2835,46 @@@ }
/** - * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector + * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector * @q_vector: pointer to q_vector which is being updated - * @coalesce: pointer to array of struct with stored coalesce + * @stored_intrl_setting: original INTRL setting * * Set coalesce param in q_vector and update these parameters in HW. */ static void -ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector, - struct ice_coalesce_stored *coalesce) +ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector, + u16 stored_intrl_setting) { - struct ice_ring_container *rx_rc = &q_vector->rx; - struct ice_ring_container *tx_rc = &q_vector->tx; struct ice_hw *hw = &q_vector->vsi->back->hw;
- tx_rc->itr_setting = coalesce->itr_tx; - rx_rc->itr_setting = coalesce->itr_rx; - - /* dynamic ITR values will be updated during Tx/Rx */ - if (!ITR_IS_DYNAMIC(tx_rc->itr_setting)) - wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(tx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - if (!ITR_IS_DYNAMIC(rx_rc->itr_setting)) - wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - - q_vector->intrl = coalesce->intrl; + q_vector->intrl = stored_intrl_setting; wr32(hw, GLINT_RATE(q_vector->reg_idx), ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); }
+/** + * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector + * @q_vector: pointer to q_vector which is being updated + * @rc: pointer to ring container + * @stored_itr_setting: original ITR setting + * + * Set coalesce param in q_vector and update these parameters in HW. + */ +static void +ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + u16 stored_itr_setting) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + rc->itr_setting = stored_itr_setting; + + /* dynamic ITR values will be updated during Tx/Rx */ + if (!ITR_IS_DYNAMIC(rc->itr_setting)) + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S); +} + /** * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors * @vsi: VSI connected with q_vectors @@@ -2894,11 -2869,6 +2894,11 @@@ ice_vsi_rebuild_get_coalesce(struct ice coalesce[i].itr_tx = q_vector->tx.itr_setting; coalesce[i].itr_rx = q_vector->rx.itr_setting; coalesce[i].intrl = q_vector->intrl; + + if (i < vsi->num_txq) + coalesce[i].tx_valid = true; + if (i < vsi->num_rxq) + coalesce[i].rx_valid = true; }
return vsi->num_q_vectors; @@@ -2923,59 -2893,17 +2923,59 @@@ ice_vsi_rebuild_set_coalesce(struct ice if ((size && !coalesce) || !vsi) return;
- for (i = 0; i < size && i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[i]); - - /* number of q_vectors increased, so assume coalesce settings were - * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use - * the previous settings from q_vector 0 for all of the new q_vectors + /* There are a couple of cases that have to be handled here: + * 1. The case where the number of queue vectors stays the same, but + * the number of Tx or Rx rings changes (the first for loop) + * 2. The case where the number of queue vectors increased (the + * second for loop) + */ + for (i = 0; i < size && i < vsi->num_q_vectors; i++) { + /* There are 2 cases to handle here and they are the same for + * both Tx and Rx: + * if the entry was valid previously (coalesce[i].[tr]x_valid + * and the loop variable is less than the number of rings + * allocated, then write the previous values + * + * if the entry was not valid previously, but the number of + * rings is less than are allocated (this means the number of + * rings increased from previously), then write out the + * values in the first element + */ + if (i < vsi->alloc_rxq && coalesce[i].rx_valid) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[i].itr_rx); + else if (i < vsi->alloc_rxq) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[0].itr_rx); + + if (i < vsi->alloc_txq && coalesce[i].tx_valid) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[i].itr_tx); + else if (i < vsi->alloc_txq) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[0].itr_tx); + + ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], + coalesce[i].intrl); + } + + /* the number of queue vectors increased so write whatever is in + * the first element */ - for (; i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[0]); + for (; i < vsi->num_q_vectors; i++) { + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[0].itr_tx); + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[0].itr_rx); + ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], + coalesce[0].intrl); + } }
/** @@@ -3004,11 -2932,9 +3004,11 @@@ int ice_vsi_rebuild(struct ice_vsi *vsi
coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (coalesce) - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, - coalesce); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi);
@@@ -3152,7 -3078,6 +3152,6 @@@ err_vsi bool ice_is_reset_in_progress(unsigned long *state) { return test_bit(__ICE_RESET_OICR_RECV, state) || - test_bit(__ICE_DCBNL_DEVRESET, state) || test_bit(__ICE_PFR_REQ, state) || test_bit(__ICE_CORER_REQ, state) || test_bit(__ICE_GLOBR_REQ, state); @@@ -3423,40 -3348,3 +3422,40 @@@ int ice_clear_dflt_vsi(struct ice_sw *s
return 0; } + +/** + * ice_set_link - turn on/off physical link + * @vsi: VSI to modify physical link on + * @ena: turn on/off physical link + */ +int ice_set_link(struct ice_vsi *vsi, bool ena) +{ + struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_port_info *pi = vsi->port_info; + struct ice_hw *hw = pi->hw; + enum ice_status status; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + status = ice_aq_set_link_restart_an(pi, ena, NULL); + + /* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE. + * this is not a fatal error, so print a warning message and return + * a success code. Return an error if FW returns an error code other + * than ICE_AQ_RC_EMODE + */ + if (status == ICE_ERR_AQ_ERROR) { + if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE) + dev_warn(dev, "can't set link to %s, err %s aq_err %s. not fatal, continuing\n", + (ena ? "ON" : "OFF"), ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + } else if (status) { + dev_err(dev, "can't set link to %s, err %s aq_err %s\n", + (ena ? "ON" : "OFF"), ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; + } + + return 0; +} diff --combined drivers/net/ethernet/intel/ice/ice_main.c index 30935aaa8935,d821c687f239..1b2f1e258e5c --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@@ -84,7 -84,7 +84,7 @@@ static void ice_check_for_hang_subtask( break; }
- if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state)) return;
if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) @@@ -140,10 -140,21 +140,10 @@@ static int ice_init_mac_fltr(struct ice
perm_addr = vsi->port_info->mac.perm_addr; status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI); - if (!status) - return 0; - - /* We aren't useful with no MAC filters, so unregister if we - * had an error - */ - if (vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n", - ice_stat_str(status)); - unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } + if (status) + return -EIO;
- return -EIO; + return 0; }
/** @@@ -198,9 -209,9 +198,9 @@@ static int ice_add_mac_to_unsync_list(s */ static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) { - return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); }
/** @@@ -267,9 -278,9 +267,9 @@@ static int ice_vsi_sync_fltr(struct ice INIT_LIST_HEAD(&vsi->tmp_unsync_list);
if (ice_vsi_fltr_changed(vsi)) { - clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
/* grab the netdev's addr_list_lock */ netif_addr_lock_bh(netdev); @@@ -350,8 -361,8 +350,8 @@@ }
if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || - test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { - clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) { + clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply Rx filter rule to get traffic from wire */ if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { @@@ -384,12 -395,12 +384,12 @@@ goto exit;
out_promisc: - set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); goto exit; out: /* if something went wrong then set the changed flag so we try again */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); exit: clear_bit(__ICE_CFG_BUSY, vsi->state); return err; @@@ -436,6 -447,7 +436,6 @@@ static void ice_pf_dis_all_vsi(struct i
for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++) pf->vf_agg_node[node].num_vsis = 0; - }
/** @@@ -597,7 -609,7 +597,7 @@@ static void ice_print_topo_conflict(str case ICE_AQ_LINK_TOPO_UNREACH_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: - netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); + netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n"); break; case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); @@@ -719,7 -731,7 +719,7 @@@ void ice_print_link_msg(struct ice_vsi }
status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) netdev_info(vsi->netdev, "Get phy capability failed.\n");
@@@ -752,7 -764,7 +752,7 @@@ static void ice_vsi_link_event(struct i if (!vsi) return;
- if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev) return;
if (vsi->type == ICE_VSI_PF) { @@@ -872,10 -884,10 +872,10 @@@ ice_link_event(struct ice_pf *pf, struc { struct device *dev = ice_pf_to_dev(pf); struct ice_phy_info *phy_info; + enum ice_status status; struct ice_vsi *vsi; u16 old_link_speed; bool old_link; - int result;
phy_info = &pi->phy; phy_info->link_info_old = phy_info->link_info; @@@ -886,11 -898,10 +886,11 @@@ /* update the link info structures and re-enable link events, * don't bail on failure due to other book keeping needed */ - result = ice_update_link_info(pi); - if (result) - dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n", - pi->lport); + status = ice_update_link_info(pi); + if (status) + dev_dbg(dev, "Failed to update link status on port %d, err %s aq_err %s\n", + pi->lport, ice_stat_str(status), + ice_aq_str(pi->hw->adminq.sq_last_status));
/* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. @@@ -906,12 -917,18 +906,12 @@@ if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) && !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { set_bit(ICE_FLAG_NO_MEDIA, pf->flags); - - result = ice_aq_set_link_restart_an(pi, false, NULL); - if (result) { - dev_dbg(dev, "Failed to set link down, VSI %d error %d\n", - vsi->vsi_num, result); - return result; - } + ice_set_link(vsi, false); }
/* if the old link up/down and speed is the same as the new */ if (link_up == old_link && link_speed == old_link_speed) - return result; + return 0;
if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@@ -925,7 -942,7 +925,7 @@@
ice_vc_notify_link_state(pf);
- return result; + return 0; }
/** @@@ -1027,7 -1044,7 +1027,7 @@@ struct ice_aq_task };
/** - * ice_wait_for_aq_event - Wait for an AdminQ event from firmware + * ice_aq_wait_for_event - Wait for an AdminQ event from firmware * @pf: pointer to the PF private structure * @opcode: the opcode to wait for * @timeout: how long to wait, in jiffies @@@ -1625,7 -1642,7 +1625,7 @@@ static int ice_force_phys_link_state(st if (!pcaps) return -ENOMEM;
- retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (retcode) { dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", @@@ -1685,7 -1702,7 +1685,7 @@@ static int ice_init_nvm_phy_type(struc if (!pcaps) return -ENOMEM;
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps, NULL);
if (status) { @@@ -1731,7 -1748,7 +1731,7 @@@ static void ice_init_link_dflt_override * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings * @pi: port info structure * - * If default override is enabled, initialized the user PHY cfg speed and FEC + * If default override is enabled, initialize the user PHY cfg speed and FEC * settings using the default override mask from the NVM. * * The PHY should only be configured with the default override settings the @@@ -1740,9 -1757,6 +1740,9 @@@ * and the PHY has not been configured with the default override settings. The * state is set here, and cleared in ice_configure_phy the first time the PHY is * configured. + * + * This function should be called only if the FW doesn't support default + * configuration mode, as reported by ice_fw_supports_report_dflt_cfg. */ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) { @@@ -1790,21 -1804,22 +1790,21 @@@ static int ice_init_phy_user_cfg(struc struct ice_phy_info *phy = &pi->phy; struct ice_pf *pf = pi->hw->back; enum ice_status status; - struct ice_vsi *vsi; int err = 0;
if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) return -EIO;
- vsi = ice_get_main_vsi(pf); - if (!vsi) - return -EINVAL; - pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return -ENOMEM;
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + pcaps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + pcaps, NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n"); err = -EIO; @@@ -1814,24 -1829,22 +1814,24 @@@ ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
/* check if lenient mode is supported and enabled */ - if (ice_fw_supports_link_override(&vsi->back->hw) && + if (ice_fw_supports_link_override(pi->hw) && !(pcaps->module_compliance_enforcement & ICE_AQC_MOD_ENFORCE_STRICT_MODE)) { set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
- /* if link default override is enabled, initialize user PHY - * configuration with link default override values + /* if the FW supports default PHY configuration mode, then the driver + * does not have to apply link override settings. If not, + * initialize user PHY configuration with link override values */ - if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) { + if (!ice_fw_supports_report_dflt_cfg(pi->hw) && + (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) { ice_init_phy_cfg_dflt_override(pi); goto out; } }
- /* if link default override is not enabled, initialize PHY using - * topology with media + /* if link default override is not enabled, set user flow control and + * FEC settings based on what get_phy_caps returned */ phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps, pcaps->link_fec_options); @@@ -1856,24 -1869,27 +1856,24 @@@ err_out static int ice_configure_phy(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_port_info *pi = vsi->port_info; struct ice_aqc_get_phy_caps_data *pcaps; struct ice_aqc_set_phy_cfg_data *cfg; - struct ice_port_info *pi; + struct ice_phy_info *phy = &pi->phy; + struct ice_pf *pf = vsi->back; enum ice_status status; int err = 0;
- pi = vsi->port_info; - if (!pi) - return -EINVAL; - /* Ensure we have media as we cannot configure a medialess port */ - if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) + if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) return -EPERM;
ice_print_topo_conflict(vsi);
- if (vsi->port_info->phy.link_info.topo_media_conflict == - ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) + if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) return -EPERM;
- if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) + if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) return ice_force_phys_link_state(vsi, true);
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); @@@ -1881,7 -1897,7 +1881,7 @@@ return -ENOMEM;
/* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n", @@@ -1894,19 -1910,15 +1894,19 @@@ * there's nothing to do */ if (pcaps->caps & ICE_AQC_PHY_EN_LINK && - ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg)) + ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg)) goto done;
/* Use PHY topology as baseline for configuration */ memset(pcaps, 0, sizeof(*pcaps)); - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + pcaps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + pcaps, NULL); if (status) { - dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n", + dev_err(dev, "Failed to get PHY caps, VSI %d error %s\n", vsi->vsi_num, ice_stat_str(status)); err = -EIO; goto done; @@@ -1925,8 -1937,8 +1925,8 @@@ */ if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, vsi->back->state)) { - cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low; - cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high; + cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low; + cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high; } else { u64 phy_low = 0, phy_high = 0;
@@@ -1944,7 -1956,7 +1944,7 @@@ }
/* FEC */ - ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req); + ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
/* Can't provide what was requested; use PHY capabilities */ if (cfg->link_fec_opt != @@@ -1956,12 -1968,12 +1956,12 @@@ /* Flow Control - always supported; no need to check against * capabilities */ - ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req); + ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
/* Enable link and link update */ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
- status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL); if (status) { dev_err(dev, "Failed to set phy config, VSI %d error %s\n", vsi->vsi_num, ice_stat_str(status)); @@@ -2008,7 -2020,7 +2008,7 @@@ static void ice_check_media_subtask(str /* PHY settings are reset on media insertion, reconfigure * PHY to preserve settings. */ - if (test_bit(__ICE_DOWN, vsi->state) && + if (test_bit(ICE_VSI_DOWN, vsi->state) && test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) return;
@@@ -2059,7 -2071,6 +2059,7 @@@ static void ice_service_task(struct wor ice_process_vflr_event(pf); ice_clean_mailboxq_subtask(pf); ice_sync_arfs_fltrs(pf); + ice_flush_fdir_ctx(pf); /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ ice_service_task_complete(pf);
@@@ -2071,7 -2082,6 +2071,7 @@@ test_bit(__ICE_MDD_EVENT_PENDING, pf->state) || test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) || + test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) || test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@@ -2210,13 -2220,8 +2210,13 @@@ static int ice_vsi_req_irq_msix(struct /* skip this unused q_vector */ continue; } - err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0, - q_vector->name, q_vector); + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) + err = devm_request_irq(dev, irq_num, vsi->irq_handler, + IRQF_SHARED, q_vector->name, + q_vector); + else + err = devm_request_irq(dev, irq_num, vsi->irq_handler, + 0, q_vector->name, q_vector); if (err) { netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", err); @@@ -2519,7 -2524,7 +2519,7 @@@ ice_xdp_setup_prog(struct ice_vsi *vsi }
/* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); @@@ -2970,11 -2975,18 +2970,11 @@@ static int ice_cfg_netdev(struct ice_vs struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - int err; - - err = ice_devlink_create_port(vsi); - if (err) - return err;
netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, vsi->alloc_rxq); - if (!netdev) { - err = -ENOMEM; - goto err_destroy_devlink_port; - } + if (!netdev) + return -ENOMEM;
vsi->netdev = netdev; np = netdev_priv(netdev); @@@ -3002,7 -3014,25 +3002,7 @@@ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU;
- err = register_netdev(vsi->netdev); - if (err) - goto err_free_netdev; - - devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); - - netif_carrier_off(vsi->netdev); - - /* make sure transmit queues start off as stopped */ - netif_tx_stop_all_queues(vsi->netdev); - return 0; - -err_free_netdev: - free_netdev(vsi->netdev); - vsi->netdev = NULL; -err_destroy_devlink_port: - ice_devlink_destroy_port(vsi); - return err; }
/** @@@ -3077,6 -3107,15 +3077,6 @@@ ice_vlan_rx_add_vid(struct net_device * struct ice_vsi *vsi = np->vsi; int ret;
- if (vid >= VLAN_N_VID) { - netdev_err(netdev, "VLAN id requested %d is out of range %d\n", - vid, VLAN_N_VID); - return -EINVAL; - } - - if (vsi->info.pvid) - return -EINVAL; - /* VLAN 0 is added by default during load/reset */ if (!vid) return 0; @@@ -3093,7 -3132,7 +3093,7 @@@ */ ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); if (!ret) - set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
return ret; } @@@ -3114,6 -3153,9 +3114,6 @@@ ice_vlan_rx_kill_vid(struct net_device struct ice_vsi *vsi = np->vsi; int ret;
- if (vsi->info.pvid) - return -EINVAL; - /* don't allow removal of VLAN 0 */ if (!vid) return 0; @@@ -3129,7 -3171,7 +3129,7 @@@ if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) ret = ice_cfg_vlan_pruning(vsi, false, false);
- set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; }
@@@ -3188,6 -3230,8 +3188,6 @@@ unroll_napi_add if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { - if (vsi->netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } @@@ -3493,15 -3537,14 +3493,14 @@@ static int ice_init_interrupt_scheme(st }
/** - * ice_is_wol_supported - get NVM state of WoL - * @pf: board private structure + * ice_is_wol_supported - check if WoL is supported + * @hw: pointer to hardware info * * Check if WoL is supported based on the HW configuration. * Returns true if NVM supports and enables WoL for this port, false otherwise */ - bool ice_is_wol_supported(struct ice_pf *pf) + bool ice_is_wol_supported(struct ice_hw *hw) { - struct ice_hw *hw = &pf->hw; u16 wol_ctrl;
/* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control @@@ -3510,7 -3553,7 +3509,7 @@@ if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl)) return false;
- return !(BIT(hw->pf_id) & wol_ctrl); + return !(BIT(hw->port_info->lport) & wol_ctrl); }
/** @@@ -3941,40 -3984,6 +3940,40 @@@ static void ice_print_wake_reason(struc dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str); }
+/** + * ice_register_netdev - register netdev and devlink port + * @pf: pointer to the PF struct + */ +static int ice_register_netdev(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + int err = 0; + + vsi = ice_get_main_vsi(pf); + if (!vsi || !vsi->netdev) + return -EIO; + + err = register_netdev(vsi->netdev); + if (err) + goto err_register_netdev; + + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + err = ice_devlink_create_port(vsi); + if (err) + goto err_devlink_create; + + devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); + + return 0; +err_devlink_create: + unregister_netdev(vsi->netdev); +err_register_netdev: + free_netdev(vsi->netdev); + vsi->netdev = NULL; + return err; +} + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@@ -4182,28 -4191,25 +4181,25 @@@ ice_probe(struct pci_dev *pdev, const s goto err_send_version_unroll; }
+ /* not a fatal error if this fails */ err = ice_init_nvm_phy_type(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err); - goto err_send_version_unroll; - }
+ /* not a fatal error if this fails */ err = ice_update_link_info(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_update_link_info failed: %d\n", err); - goto err_send_version_unroll; - }
ice_init_link_dflt_override(pf->hw.port_info);
/* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + /* not a fatal error if this fails */ err = ice_init_phy_user_cfg(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err); - goto err_send_version_unroll; - }
if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) { struct ice_vsi *vsi = ice_get_main_vsi(pf); @@@ -4255,16 -4261,10 +4251,16 @@@ pcie_print_link_status(pf->pdev);
probe_done: + err = ice_register_netdev(pf); + if (err) + goto err_netdev_reg; + /* ready to go, so clear down state bit */ clear_bit(__ICE_DOWN, pf->state); + return 0;
+err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: @@@ -4310,7 -4310,7 +4306,7 @@@ static void ice_set_wake(struct ice_pf }
/** - * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet + * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet * @pf: pointer to the PF struct * * Issue firmware command to enable multicast magic wake, making @@@ -4564,6 -4564,7 +4560,7 @@@ static int __maybe_unused ice_suspend(s continue; ice_vsi_free_q_vectors(pf->vsi[v]); } + ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf);
pci_save_state(pdev); @@@ -4960,8 -4961,8 +4957,8 @@@ static void ice_set_rx_mode(struct net_ * ndo_set_rx_mode may be triggered even without a change in netdev * flags */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
/* schedule our worker thread which will take care of @@@ -5234,7 -5235,7 +5231,7 @@@ static int ice_up_complete(struct ice_v if (err) return err;
- clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_napi_enable_all(vsi); ice_vsi_ena_irq(vsi);
@@@ -5341,6 -5342,7 +5338,6 @@@ static void ice_update_vsi_ring_stats(s vsi->tx_linearize = 0; vsi->rx_buf_failed = 0; vsi->rx_page_failed = 0; - vsi->rx_gro_dropped = 0;
rcu_read_lock();
@@@ -5355,6 -5357,7 +5352,6 @@@ vsi_stats->rx_bytes += bytes; vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; - vsi->rx_gro_dropped += ring->rx_stats.gro_dropped; }
/* update XDP Tx rings counters */ @@@ -5375,7 -5378,7 +5372,7 @@@ void ice_update_vsi_stats(struct ice_vs struct ice_eth_stats *cur_es = &vsi->eth_stats; struct ice_pf *pf = vsi->back;
- if (test_bit(__ICE_DOWN, vsi->state) || + if (test_bit(ICE_VSI_DOWN, vsi->state) || test_bit(__ICE_CFG_BUSY, pf->state)) return;
@@@ -5386,7 -5389,7 +5383,7 @@@ ice_update_eth_stats(vsi);
cur_ns->tx_errors = cur_es->tx_errors; - cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped; + cur_ns->rx_dropped = cur_es->rx_discards; cur_ns->tx_dropped = cur_es->tx_discards; cur_ns->multicast = cur_es->rx_multicast;
@@@ -5580,7 -5583,7 +5577,7 @@@ void ice_get_stats64(struct net_device * But, only call the update routine and read the registers if VSI is * not down. */ - if (!test_bit(__ICE_DOWN, vsi->state)) + if (!test_bit(ICE_VSI_DOWN, vsi->state)) ice_update_vsi_ring_stats(vsi); stats->tx_packets = vsi_stats->tx_packets; stats->tx_bytes = vsi_stats->tx_bytes; @@@ -5780,7 -5783,7 +5777,7 @@@ int ice_vsi_open_ctrl(struct ice_vsi *v if (err) goto err_up_complete;
- clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_vsi_ena_irq(vsi);
return 0; @@@ -6167,7 -6170,7 +6164,7 @@@ static int ice_change_mtu(struct net_de netdev->mtu = (unsigned int)new_mtu;
/* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { int err;
err = ice_down(vsi); @@@ -6302,118 -6305,89 +6299,118 @@@ const char *ice_stat_str(enum ice_statu }
/** - * ice_set_rss - Set RSS keys and lut + * ice_set_rss_lut - Set RSS LUT * @vsi: Pointer to VSI structure - * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure */ -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev;
- dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL;
- status = ice_aq_set_rss_key(hw, vsi->idx, buf); + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut;
- if (status) { - dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + status = ice_aq_set_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
- if (lut) { - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; +} + +/** + * ice_set_rss_key - Set RSS key + * @vsi: Pointer to the VSI structure + * @seed: RSS hash seed + * + * Returns 0 on success, negative on failure + */ +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
return 0; }
/** - * ice_get_rss - Get RSS keys and lut + * ice_get_rss_lut - Get RSS LUT * @vsi: Pointer to VSI structure - * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev;
- dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL;
- status = ice_aq_get_rss_key(hw, vsi->idx, buf); - if (status) { - dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut; + + status = ice_aq_get_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
- if (lut) { - status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; +} + +/** + * ice_get_rss_key - Get RSS key + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the key in + * + * Returns 0 on success, negative on failure + */ +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; }
return 0; @@@ -6636,7 -6610,7 +6633,7 @@@ static void ice_tx_timeout(struct net_d default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); set_bit(__ICE_DOWN, pf->state); - set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); set_bit(__ICE_SERVICE_DIS, pf->state); break; } @@@ -6658,12 -6632,33 +6655,34 @@@ * Returns 0 on success, negative value on failure */ int ice_open(struct net_device *netdev) + { + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't open net device while reset is in progress"); + return -EBUSY; + } + + return ice_open_internal(netdev); + } + + /** + * ice_open_internal - Called when a network interface becomes active + * @netdev: network interface device structure + * + * Internal ice_open implementation. Should not be used directly except for ice_open and reset + * handling routine + * + * Returns 0 on success, negative value on failure + */ + int ice_open_internal(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_port_info *pi; + enum ice_status status; int err;
if (test_bit(__ICE_NEEDS_RESTART, pf->state)) { @@@ -6671,14 -6666,19 +6690,14 @@@ return -EIO; }
- if (test_bit(__ICE_DOWN, pf->state)) { - netdev_err(netdev, "device is not ready yet\n"); - return -EBUSY; - } - netif_carrier_off(netdev);
pi = vsi->port_info; - err = ice_update_link_info(pi); - if (err) { - netdev_err(netdev, "Failed to get link info, error %d\n", - err); - return err; + status = ice_update_link_info(pi); + if (status) { + netdev_err(netdev, "Failed to get link info, error %s\n", + ice_stat_str(status)); + return -EIO; }
/* Set PHY if there is media, otherwise, turn off PHY */ @@@ -6701,7 -6701,12 +6720,7 @@@ } } else { set_bit(ICE_FLAG_NO_MEDIA, pf->flags); - err = ice_aq_set_link_restart_an(pi, false, NULL); - if (err) { - netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n", - vsi->vsi_num, err); - return err; - } + ice_set_link(vsi, false); }
err = ice_vsi_open(vsi); @@@ -6729,6 -6734,12 +6748,12 @@@ int ice_stop(struct net_device *netdev { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't stop net device while reset is in progress"); + return -EBUSY; + }
ice_vsi_close(vsi);
diff --combined drivers/net/ethernet/intel/ice/ice_switch.c index 5e5683a3eb23,834cbd3f7b31..357d3073d814 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@@ -920,7 -920,7 +920,7 @@@ ice_create_vsi_list_map(struct ice_hw * struct ice_vsi_list_map_info *v_map; int i;
- v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL); + v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL); if (!v_map) return NULL;
@@@ -1238,6 -1238,9 +1238,9 @@@ ice_add_update_vsi_list(struct ice_hw * ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id);
+ if (!m_entry->vsi_list_info) + return ICE_ERR_NO_MEMORY; + /* If this entry was large action then the large action needs * to be updated to point to FWD to VSI list */ @@@ -2220,6 -2223,7 +2223,7 @@@ ice_vsi_uses_fltr(struct ice_fltr_mgmt_ return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI && fm_entry->fltr_info.vsi_handle == vsi_handle) || (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST && + fm_entry->vsi_list_info && (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map)))); }
@@@ -2292,14 -2296,12 +2296,12 @@@ ice_add_to_vsi_fltr_list(struct ice_hw return ICE_ERR_PARAM;
list_for_each_entry(fm_entry, lkup_list_head, list_entry) { - struct ice_fltr_info *fi; - - fi = &fm_entry->fltr_info; - if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle)) + if (!ice_vsi_uses_fltr(fm_entry, vsi_handle)) continue;
status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, - vsi_list_head, fi); + vsi_list_head, + &fm_entry->fltr_info); if (status) return status; } @@@ -2622,7 -2624,7 +2624,7 @@@ ice_remove_vsi_lkup_fltr(struct ice_hw &remove_list_head); mutex_unlock(rule_lock); if (status) - return; + goto free_fltr_list;
switch (lkup) { case ICE_SW_LKUP_MAC: @@@ -2645,6 -2647,7 +2647,7 @@@ break; }
+ free_fltr_list: list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { list_del(&fm_entry->list_entry); devm_kfree(ice_hw_to_dev(hw), fm_entry); diff --combined drivers/net/ethernet/intel/ice/ice_type.h index 2efc91b58c9e,266036b7a49a..7ead1c13f16f --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@@ -192,24 -192,6 +192,24 @@@ enum ice_fltr_ptype ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_SCTP, ICE_FLTR_PTYPE_NONF_IPV4_OTHER, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER, + ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER, + ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3, + ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3, + ICE_FLTR_PTYPE_NONF_IPV4_ESP, + ICE_FLTR_PTYPE_NONF_IPV6_ESP, + ICE_FLTR_PTYPE_NONF_IPV4_AH, + ICE_FLTR_PTYPE_NONF_IPV6_AH, + ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP, + ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP, + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE, + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION, + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE, + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION, + ICE_FLTR_PTYPE_NON_IP_L2, ICE_FLTR_PTYPE_FRAG_IPV4, ICE_FLTR_PTYPE_NONF_IPV6_UDP, ICE_FLTR_PTYPE_NONF_IPV6_TCP, @@@ -553,6 -535,7 +553,7 @@@ struct ice_dcb_app_priority_table #define ICE_TLV_STATUS_ERR 0x4 #define ICE_APP_PROT_ID_FCOE 0x8906 #define ICE_APP_PROT_ID_ISCSI 0x0cbc + #define ICE_APP_PROT_ID_ISCSI_860 0x035c #define ICE_APP_PROT_ID_FIP 0x8914 #define ICE_APP_SEL_ETHTYPE 0x1 #define ICE_APP_SEL_TCPIP 0x2 @@@ -720,13 -703,13 +721,13 @@@ struct ice_hw
enum ice_aq_err pkg_dwnld_status;
- /* Driver's package ver - (from the Metadata seg) */ + /* Driver's package ver - (from the Ice Metadata section) */ struct ice_pkg_ver pkg_ver; u8 pkg_name[ICE_PKG_NAME_SIZE];
- /* Driver's Ice package version (from the Ice seg) */ - struct ice_pkg_ver ice_pkg_ver; - u8 ice_pkg_name[ICE_PKG_NAME_SIZE]; + /* Driver's Ice segment format version and ID (from the Ice seg) */ + struct ice_pkg_ver ice_seg_fmt_ver; + u8 ice_seg_id[ICE_SEG_ID_SIZE];
/* Pointer to the ice segment */ struct ice_seg *seg; @@@ -827,14 -810,6 +828,14 @@@ struct ice_hw_port_stats u64 fd_sb_match; };
+struct ice_aq_get_set_rss_lut_params { + u16 vsi_handle; /* software VSI handle */ + u16 lut_size; /* size of the LUT buffer */ + u8 lut_type; /* type of the LUT (i.e. VSI, PF, Global) */ + u8 *lut; /* input RSS LUT for set and output RSS LUT for get */ + u8 global_lut_id; /* only valid when lut_type is global */ +}; + /* Checksum and Shadow RAM pointers */ #define ICE_SR_NVM_CTRL_WORD 0x00 #define ICE_SR_BOOT_CFG_PTR 0x132 @@@ -941,9 -916,4 +942,9 @@@ #define ICE_FW_API_LLDP_FLTR_MIN 7 #define ICE_FW_API_LLDP_FLTR_PATCH 1
+/* AQ API version for report default configuration */ +#define ICE_FW_API_REPORT_DFLT_CFG_MAJ 1 +#define ICE_FW_API_REPORT_DFLT_CFG_MIN 7 +#define ICE_FW_API_REPORT_DFLT_CFG_PATCH 3 + #endif /* _ICE_TYPE_H_ */ diff --combined drivers/net/ethernet/mellanox/mlx5/core/dev.c index 4def64d0e669,9153c9bda96f..a9166cd85013 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@@ -58,6 -58,9 +58,6 @@@ static bool is_eth_supported(struct mlx if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false;
- if (is_eth_rep_supported(dev)) - return false; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return false;
@@@ -188,12 -191,12 +188,12 @@@ static bool is_ib_supported(struct mlx5 }
enum { - MLX5_INTERFACE_PROTOCOL_ETH_REP, MLX5_INTERFACE_PROTOCOL_ETH, + MLX5_INTERFACE_PROTOCOL_ETH_REP,
+ MLX5_INTERFACE_PROTOCOL_IB, MLX5_INTERFACE_PROTOCOL_IB_REP, MLX5_INTERFACE_PROTOCOL_MPIB, - MLX5_INTERFACE_PROTOCOL_IB,
MLX5_INTERFACE_PROTOCOL_VNET, }; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index b425b4a539bf,bc6f77ea0a31..e1c51eabe8fe --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -269,7 -269,6 +269,7 @@@ struct mlx5e_params struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; + bool ptp_rx; };
enum { @@@ -517,6 -516,7 +517,7 @@@ struct mlx5e_icosq struct mlx5_wq_cyc wq; void __iomem *uar_map; u32 sqn; + u16 reserved_room; unsigned long state;
/* control path */ @@@ -708,11 -708,11 +709,11 @@@ struct mlx5e_channel int cpu; };
-struct mlx5e_port_ptp; +struct mlx5e_ptp;
struct mlx5e_channels { struct mlx5e_channel **c; - struct mlx5e_port_ptp *port_ptp; + struct mlx5e_ptp *ptp; unsigned int num; struct mlx5e_params params; }; @@@ -727,11 -727,10 +728,11 @@@ struct mlx5e_channel_stats struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp;
-struct mlx5e_port_ptp_stats { +struct mlx5e_ptp_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; } ____cacheline_aligned_in_smp;
enum { @@@ -838,7 -837,6 +839,7 @@@ struct mlx5e_priv struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir ptp_tir; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS];
@@@ -858,11 -856,10 +859,11 @@@ struct mlx5e_stats stats; struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_channel_stats trap_stats; - struct mlx5e_port_ptp_stats port_ptp_stats; + struct mlx5e_ptp_stats ptp_stats; u16 max_nch; u8 max_opened_tc; - bool port_ptp_opened; + bool tx_ptp_opened; + bool rx_ptp_opened; struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; @@@ -885,6 -882,7 +886,6 @@@ #endif struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; - struct devlink_port dl_port; struct mlx5e_xsk xsk; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) struct mlx5e_hv_vhca_stats_agent stats_agent; @@@ -918,12 -916,13 +919,12 @@@ struct mlx5e_profile const struct mlx5e_rx_handlers *rx_handlers; int max_tc; u8 rq_groups; + bool rx_ptp_support; };
void mlx5e_build_ptys2ethtool_map(void);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); @@@ -966,9 -965,9 +967,9 @@@ struct mlx5e_tirc_config mlx5e_tirc_get struct mlx5e_xsk_param;
struct mlx5e_rq_param; -int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq); +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq); int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); @@@ -1023,11 -1022,18 +1024,11 @@@ int mlx5e_num_channels_changed(struct m int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels);
-void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); - -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@@ -1086,10 -1092,10 +1087,10 @@@ int mlx5e_create_indirect_rqt(struct ml int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); @@@ -1170,9 -1176,10 +1171,9 @@@ void mlx5e_detach_netdev(struct mlx5e_p void mlx5e_destroy_netdev(struct mlx5e_priv *priv); int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, const struct mlx5e_profile *new_profile, void *new_ppriv); +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 1c44000ad675,68e54cc1cd16..5da5e5323a44 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@@ -29,8 -29,6 +29,8 @@@ #define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_NAT_BIT BIT(3) #define MLX5_CT_STATE_REPLY_BIT BIT(4) +#define MLX5_CT_STATE_RELATED_BIT BIT(5) +#define MLX5_CT_STATE_INVALID_BIT BIT(6)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@@ -187,6 -185,28 +187,28 @@@ mlx5_tc_ct_entry_has_nat(struct mlx5_ct return !!(entry->tuple_nat_node.next); }
+ static int + mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, + u32 *labels, u32 *id) + { + if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { + *id = 0; + return 0; + } + + if (mapping_add(ct_priv->labels_mapping, labels, id)) + return -EOPNOTSUPP; + + return 0; + } + + static void + mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) + { + if (id) + mapping_remove(ct_priv->labels_mapping, id); + } + static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { @@@ -438,7 -458,7 +460,7 @@@ mlx5_tc_ct_entry_del_rule(struct mlx5_t mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); kfree(attr); }
@@@ -641,8 -661,8 +663,8 @@@ mlx5_tc_ct_entry_create_mod_hdr(struct if (!meta) return -EOPNOTSUPP;
- err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, - &attr->ct_attr.ct_labels_id); + err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, + &attr->ct_attr.ct_labels_id); if (err) return -EOPNOTSUPP; if (nat) { @@@ -679,7 -699,7 +701,7 @@@
err_mapping: dealloc_mod_hdr_actions(&mod_acts); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); return err; }
@@@ -697,7 -717,7 +719,7 @@@ mlx5_tc_ct_entry_add_rule(struct mlx5_t
zone_rule->nat = nat;
- spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM;
@@@ -739,7 -759,7 +761,7 @@@
zone_rule->attr = attr;
- kfree(spec); + kvfree(spec); ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0; @@@ -747,11 -767,11 +769,11 @@@ err_rule: mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: kfree(attr); err_attr: - kfree(spec); + kvfree(spec); return err; }
@@@ -1199,7 -1219,7 +1221,7 @@@ void mlx5_tc_ct_match_del(struct mlx5_t if (!priv || !ct_attr->ct_labels_id) return;
- mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id); + mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); }
int @@@ -1209,8 -1229,8 +1231,8 @@@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_ struct mlx5_ct_attr *ct_attr, struct netlink_ext_ack *extack) { + bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; struct flow_rule *rule = flow_cls_offload_flow_rule(f); - bool trk, est, untrk, unest, new, rpl, unrpl; struct flow_dissector_key_ct *mask, *key; u32 ctstate = 0, ctstate_mask = 0; u16 ct_state_on, ct_state_off; @@@ -1238,9 -1258,7 +1260,9 @@@ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | TCA_FLOWER_KEY_CT_FLAGS_NEW | - TCA_FLOWER_KEY_CT_FLAGS_REPLY)) { + TCA_FLOWER_KEY_CT_FLAGS_REPLY | + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { NL_SET_ERR_MSG_MOD(extack, "only ct_state trk, est, new and rpl are supported for offload"); return -EOPNOTSUPP; @@@ -1252,13 -1270,9 +1274,13 @@@ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; @@@ -1266,20 -1280,6 +1288,20 @@@ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; + ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; + + if (rel) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +rel isn't supported"); + return -EOPNOTSUPP; + } + + if (inv) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +inv isn't supported"); + return -EOPNOTSUPP; + }
if (new) { NL_SET_ERR_MSG_MOD(extack, @@@ -1302,7 -1302,7 +1324,7 @@@ ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; - if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) + if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) return -EOPNOTSUPP; mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, MLX5_CT_LABELS_MASK); @@@ -1562,14 -1562,6 +1584,14 @@@ mlx5_tc_ct_free_pre_ct_tables(struct ml mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); }
+/* To avoid false lock dependency warning set the ct_entries_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key ct_entries_ht_lock_key; + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@@ -1604,8 -1596,6 +1626,8 @@@ if (err) goto err_init;
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, zone_params); if (err) @@@ -1707,10 -1697,10 +1729,10 @@@ __mlx5_tc_ct_flow_offload(struct mlx5_t struct mlx5_ct_ft *ft; u32 fte_id = 1;
- post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); if (!post_ct_spec || !ct_flow) { - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@@ -1820,10 -1810,6 +1842,10 @@@ ct_flow->post_ct_attr->prio = 0; ct_flow->post_ct_attr->ft = ct_priv->post_ct;
+ /* Splits were handled before CT */ + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + ct_flow->post_ct_attr->esw_attr->split_count = 0; + ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); @@@ -1849,7 -1835,7 +1871,7 @@@
attr->ct_attr.ct_flow = ct_flow; dealloc_mod_hdr_actions(&pre_mod_acts); - kfree(post_ct_spec); + kvfree(post_ct_spec);
return rule;
@@@ -1870,7 -1856,7 +1892,7 @@@ err_alloc_pre err_idr: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return ERR_PTR(err); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 89d5ca91566e,e1271998b937..9350ca05ce65 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@@ -21,6 -21,11 +21,11 @@@ enum MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, };
+ struct mlx5e_encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; + }; + struct mlx5e_tc_tunnel { int tunnel_type; enum mlx5_flow_match_level match_level; @@@ -44,6 -49,8 +49,8 @@@ struct flow_cls_offload *f, void *headers_c, void *headers_v); + bool (*encap_info_equal)(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); };
extern struct mlx5e_tc_tunnel vxlan_tunnel; @@@ -76,12 -83,10 +83,12 @@@ int mlx5e_tc_tun_update_header_ipv6(str static inline int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } -int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } +static inline int +mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } #endif int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, @@@ -103,6 -108,9 +110,9 @@@ int mlx5e_tc_tun_parse_udp_ports(struc void *headers_c, void *headers_v);
+ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + #endif /* CONFIG_MLX5_ESWITCH */
#endif //__MLX5_EN_TC_TUNNEL_H__ diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 01d435e15ad3,9f16ad2c0710..593503bc4d07 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@@ -2,7 -2,6 +2,7 @@@ /* Copyright (c) 2021 Mellanox Technologies. */
#include <net/fib_notifier.h> +#include <net/nexthop.h> #include "tc_tun_encap.h" #include "en_tc.h" #include "tc_tun.h" @@@ -477,16 -476,11 +477,11 @@@ void mlx5e_detach_decap(struct mlx5e_pr mlx5e_decap_dealloc(priv, d); }
- struct encap_key { - const struct ip_tunnel_key *ip_tun_key; - struct mlx5e_tc_tunnel *tc_tunnel; - }; - - static int cmp_encap_info(struct encap_key *a, - struct encap_key *b) + bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) { - return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && + a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; }
static int cmp_decap_info(struct mlx5e_decap_key *a, @@@ -495,7 -489,7 +490,7 @@@ return memcmp(&a->key, &b->key, sizeof(b->key)); }
- static int hash_encap_info(struct encap_key *key) + static int hash_encap_info(struct mlx5e_encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); @@@ -517,18 -511,18 +512,18 @@@ static bool mlx5e_decap_take(struct mlx }
static struct mlx5e_encap_entry * - mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, uintptr_t hash_key) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_key e_key; struct mlx5e_encap_entry *e; - struct encap_key e_key;
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { e_key.ip_tun_key = &e->tun_info->key; e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, key) && + if (e->tunnel->encap_info_equal(&e_key, key) && mlx5e_encap_take(e)) return e; } @@@ -695,8 -689,8 +690,8 @@@ int mlx5e_attach_encap(struct mlx5e_pri struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; unsigned long tbl_time_before = 0; - struct encap_key key; struct mlx5e_encap_entry *e; + struct mlx5e_encap_key key; bool entry_created = false; unsigned short family; uintptr_t hash_key; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index f7c880edae37,19d22a63313f..8c0f78c09215 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@@ -46,7 -46,8 +46,8 @@@ struct mlx5e_ktls_offload_context_rx struct tls12_crypto_info_aes_gcm_128 crypto_info; struct accel_rule rule; struct sock *sk; - struct mlx5e_rq_stats *stats; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; u32 tirn; u32 key_id; @@@ -84,7 -85,7 +85,7 @@@ static int mlx5e_ktls_create_tir(struc
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); MLX5_SET(tirc, tirc, indirect_table, rqtn); @@@ -137,11 -138,10 +138,10 @@@ post_static_params(struct mlx5e_icosq * { struct mlx5e_set_tls_static_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs;
num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC);
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@@ -168,11 -168,10 +168,10 @@@ post_progress_params(struct mlx5e_icos { struct mlx5e_set_tls_progress_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs;
num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC);
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@@ -218,7 -217,7 +217,7 @@@ unlock return err;
err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; err = PTR_ERR(cseg); complete(&priv_rx->add_ctx); goto unlock; @@@ -277,17 -276,15 +276,15 @@@ resync_post_get_progress_params(struct
buf->priv_rx = priv_rx;
- BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); - spin_lock_bh(&sq->channel->async_icosq_lock);
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { spin_unlock_bh(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; }
- pi = mlx5e_icosq_get_next_pi(sq, 1); + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS); wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi);
#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)) @@@ -307,7 -304,7 +304,7 @@@
wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS, - .num_wqebbs = 1, + .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS, .tls_get_params.buf = buf, }; icosq_fill_wi(sq, pi, &wi); @@@ -322,7 -319,7 +319,7 @@@ err_dma_unmap err_free: kfree(buf); err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; return err; }
@@@ -378,13 -375,13 +375,13 @@@ static int resync_handle_seq_match(stru
cseg = post_static_params(sq, priv_rx); if (IS_ERR(cseg)) { - priv_rx->stats->tls_resync_res_skip++; + priv_rx->rq_stats->tls_resync_res_skip++; err = PTR_ERR(cseg); goto unlock; } /* Do not increment priv_rx refcnt, CQE handling is empty */ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); - priv_rx->stats->tls_resync_res_ok++; + priv_rx->rq_stats->tls_resync_res_ok++; unlock: spin_unlock_bh(&c->async_icosq_lock);
@@@ -420,13 -417,13 +417,13 @@@ void mlx5e_ktls_handle_get_psv_completi auth_state = MLX5_GET(tls_progress_params, ctx, auth_state); if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; goto out; }
hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); - priv_rx->stats->tls_resync_req_end++; + priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@@ -609,7 -606,8 +606,8 @@@ int mlx5e_ktls_add_rx(struct net_devic priv_rx->rxq = rxq; priv_rx->sk = sk;
- priv_rx->stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
rqtn = priv->direct_tir[rxq].rqt.rqtn; @@@ -630,7 -628,7 +628,7 @@@ if (err) goto err_post_wqes;
- priv_rx->stats->tls_ctx++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx);
return 0;
@@@ -666,7 -664,7 +664,7 @@@ void mlx5e_ktls_del_rx(struct net_devic if (cancel_work_sync(&resync->work)) mlx5e_ktls_priv_rx_put(priv_rx);
- priv_rx->stats->tls_del++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_del); if (priv_rx->rule.rule) mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 964558086ad6,53802e18af90..b185a0452629 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -34,7 -34,6 +34,7 @@@ #include "en/port.h" #include "en/params.h" #include "en/xsk/pool.h" +#include "en/ptp.h" #include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@@ -369,7 -368,7 +369,7 @@@ int mlx5e_ethtool_set_ringparam(struct new_channels.params.log_rq_mtu_frames = log_rq_size; new_channels.params.log_sq_size = log_sq_size;
- err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_channels.params); if (err) goto unlock;
@@@ -759,11 -758,11 +759,11 @@@ static int get_fec_supported_advertised return 0; }
- static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, - u32 eth_proto_cap, - u8 connector_type, bool ext) + static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, + struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, u8 connector_type) { - if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) @@@ -899,9 -898,9 +899,9 @@@ static int ptys2connector_type[MLX5E_CO [MLX5E_PORT_OTHER] = PORT_OTHER, };
- static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext) + static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type) { - if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) return ptys2connector_type[connector_type];
if (eth_proto & @@@ -1002,11 -1001,11 +1002,11 @@@ int mlx5e_ethtool_get_link_ksettings(st data_rate_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - - link_ksettings->base.port = get_connector_port(eth_proto_oper, - connector_type, ext); - ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, - connector_type, ext); + connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? + connector_type : MLX5E_PORT_UNKNOWN; + link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type); + ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin, + connector_type); get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
if (an_status == MLX5_AN_COMPLETE) @@@ -1866,19 -1865,13 +1866,19 @@@ int mlx5e_modify_rx_cqe_compression_loc
new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) + new_channels.params.ptp_rx = new_val;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; return 0; }
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + else + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, + &new_channels.params.ptp_rx); if (err) return err;
@@@ -1899,6 -1892,11 +1899,6 @@@ static int set_pflag_rx_cqe_compress(st if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP;
- if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { - netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); - return -EINVAL; - } - err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); if (err) return err; @@@ -2034,7 -2032,7 +2034,7 @@@ static int set_pflag_tx_port_ts(struct mlx5e_num_channels_changed_ctx, NULL); out: if (!err) - priv->port_ptp_opened = true; + priv->tx_ptp_opened = true;
return err; } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 773449c1424b,5db63b9f3b70..2f47608bb9b9 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -87,6 -87,51 +87,6 @@@ bool mlx5e_check_fragmented_striding_rq return true; }
-void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - params->log_rq_mtu_frames = is_kdump_kernel() ? - MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - - mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : - BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); -} - -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return false; - - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; - - if (params->xdp_prog) { - /* XSK params are not considered here. If striding RQ is in use, - * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will - * be called with the known XSK params. - */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return false; - } - - return true; -} - -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_CYCLIC; -} - void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@@ -214,17 -259,18 +214,17 @@@ static inline void mlx5e_build_umr_wqe( ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); }
-static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, - struct mlx5e_channel *c) +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, sizeof(*rq->mpwqe.info)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM;
- mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe); + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
return 0; } @@@ -256,7 -302,7 +256,7 @@@ static int mlx5e_create_umr_mkey(struc MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); @@@ -373,53 -419,58 +373,53 @@@ static void mlx5e_free_mpwqe_rq_drop_pa __free_page(rq->wqe_overflow.page); }
-static int mlx5e_alloc_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, +static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, NULL); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq_param *rqp, - struct mlx5e_rq *rq) + int node, struct mlx5e_rq *rq) { struct page_pool_params pp_params = { 0 }; - struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); - u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; int i;
- rqp->wq.db_numa_node = cpu_to_node(c->cpu); - - rq->wq_type = params->rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->priv = c->priv; - rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; - rq->icosq = &c->icosq; - rq->ix = c->ix; - rq->mdev = mdev; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->xdpsq = &c->rq_xdpsq; - rq->xsk_pool = xsk_pool; - rq->ptp_cyc2time = mlx5_is_real_time_rq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; - - if (rq->xsk_pool) - rq->stats = &c->priv->channel_stats[c->ix].xskrq; - else - rq->stats = &c->priv->channel_stats[c->ix].rq; + rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
- rq_xdp_ix = rq->ix; - if (xsk) - rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); - if (err < 0) - goto err_rq_xdp_prog; - rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; @@@ -429,7 -480,7 +429,7 @@@ err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog;
err = mlx5e_alloc_mpwqe_rq_drop_page(rq); if (err) @@@ -453,7 -504,7 +453,7 @@@ goto err_rq_drop_page; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
- err = mlx5e_rq_alloc_mpwqe_info(rq, c); + err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) goto err_rq_mkey; break; @@@ -461,7 -512,7 +461,7 @@@ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
@@@ -473,19 -524,23 +473,19 @@@ rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->wqe.frags) { err = -ENOMEM; goto err_rq_wq_destroy; }
- err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu)); + err = mlx5e_init_di_list(rq, wq_sz, node); if (err) goto err_rq_frags;
- rq->mkey_be = c->mkey_be; + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key); }
- err = mlx5e_rq_set_handlers(rq, params, xsk); - if (err) - goto err_free_by_rq_type; - if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); @@@ -495,8 -550,8 +495,8 @@@ pp_params.order = 0; pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; + pp_params.nid = node; + pp_params.dev = rq->pdev; pp_params.dma_dir = rq->buff.map_dir;
/* page_pool can be used even when there is no rq->xdp_prog, @@@ -580,6 -635,8 +580,6 @@@ err_rq_frags } err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); -err_rq_xdp: - xdp_rxq_info_unreg(&rq->xdp_rxq); err_rq_xdp_prog: if (params->xdp_prog) bpf_prog_put(params->xdp_prog); @@@ -592,12 -649,10 +592,12 @@@ static void mlx5e_free_rq(struct mlx5e_ struct bpf_prog *old_prog; int i;
- old_prog = rcu_dereference_protected(rq->xdp_prog, - lockdep_is_held(&rq->priv->state_lock)); - if (old_prog) - bpf_prog_put(old_prog); + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + }
switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@@ -833,14 -888,13 +833,14 @@@ void mlx5e_free_rx_descs(struct mlx5e_r
}
-int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq) { + struct mlx5_core_dev *mdev = rq->mdev; int err;
- err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq); + err = mlx5e_alloc_rq(params, xsk, param, node, rq); if (err) return err;
@@@ -852,28 -906,28 +852,28 @@@ if (err) goto err_destroy_rq;
- if (mlx5e_is_tls_on(c->priv) && !mlx5_accel_is_ktls_device(c->mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &c->rq.state); /* must be FPGA */ + if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev)) + __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
- if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full)) - __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state); + if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
/* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render * skb->checksum incorrect. */ - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) - __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
/* For CQE compression on striding RQ, use stride index provided by * HW if capability is supported. */ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && - MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index)) - __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state); + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
return 0;
@@@ -888,10 -942,7 +888,10 @@@ err_free_rq void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_trigger_irq(rq->icosq); + if (rq->icosq) + mlx5e_trigger_irq(rq->icosq); + else + napi_schedule(rq->cq.napi); }
void mlx5e_deactivate_rq(struct mlx5e_rq *rq) @@@ -903,8 -954,7 +903,8 @@@ void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - cancel_work_sync(&rq->icosq->recover_work); + if (rq->icosq) + cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@@ -969,7 -1019,7 +969,7 @@@ static int mlx5e_alloc_xdpsq(struct mlx sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->xsk_pool = xsk_pool; @@@ -1040,7 -1090,8 +1040,8 @@@ static int mlx5e_alloc_icosq(struct mlx int err;
sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->reserved_room = param->stop_room;
param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@@ -1124,7 -1175,7 +1125,7 @@@ static int mlx5e_alloc_txqsq(struct mlx sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); @@@ -1137,7 -1188,9 +1138,7 @@@ if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); sq->stop_room = param->stop_room; - sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@@ -1205,7 -1258,7 +1206,7 @@@ static int mlx5e_create_sq(struct mlx5_ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); @@@ -1808,16 -1861,14 +1809,16 @@@ static int mlx5e_set_tx_maxrate(struct return err; }
-void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params) { - *ccp = (struct mlx5e_create_cq_param) { - .napi = &c->napi, - .ch_stats = c->stats, - .node = cpu_to_node(c->cpu), - .ix = c->ix, - }; + int err; + + err = mlx5e_init_rxq_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); }
static int mlx5e_open_queues(struct mlx5e_channel *c, @@@ -1880,7 -1931,7 +1881,7 @@@ goto err_close_sqs; }
- err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); if (err) goto err_close_xdp_sq;
@@@ -1982,7 -2033,7 +1983,7 @@@ static int mlx5e_open_channel(struct ml c->cpu = cpu; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; @@@ -2061,6 -2112,314 +2062,6 @@@ static void mlx5e_close_channel(struct kvfree(c); }
-#define DEFAULT_FRAG_SIZE (2048) - -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) -{ - u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); - int frag_size_max = DEFAULT_FRAG_SIZE; - u32 buf_size = 0; - int i; - - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - - if (mlx5e_rx_is_linear_skb(params, xsk)) { - int frag_stride; - - frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); - frag_stride = roundup_pow_of_two(frag_stride); - - info->arr[0].frag_size = byte_count; - info->arr[0].frag_stride = frag_stride; - info->num_frags = 1; - info->wqe_bulk = PAGE_SIZE / frag_stride; - goto out; - } - - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) - frag_size_max = PAGE_SIZE; - - i = 0; - while (buf_size < byte_count) { - int frag_size = byte_count - buf_size; - - if (i < MLX5E_MAX_RX_FRAGS - 1) - frag_size = min(frag_size, frag_size_max); - - info->arr[i].frag_size = frag_size; - info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - - buf_size += frag_size; - i++; - } - info->num_frags = i; - /* number of different wqes sharing a page */ - info->wqe_bulk = 1 + (info->num_frags % 2); - -out: - info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); - info->log_num_frags = order_base_2(info->num_frags); -} - -static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) -{ - int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; - - switch (wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - sz += sizeof(struct mlx5e_rx_wqe_ll); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - sz += sizeof(struct mlx5e_rx_wqe_cyc); - } - - return order_base_2(sz); -} - -static u8 mlx5e_get_rq_log_wq_sz(void *rqc) -{ - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - return MLX5_GET(wq, wq, log_wq_sz); -} - -void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - int ndsegs = 1; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); - MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); - ndsegs = param->frags_info.num_frags; - } - - MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); - MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); - MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); - MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); - MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - mlx5e_build_rx_cq_param(priv, params, xsk, ¶m->cqp); -} - -static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, - struct mlx5e_rq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); - MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); -} - -void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); -} - -void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - bool allow_swp; - - allow_swp = mlx5_geneve_tx_allowed(priv->mdev) || - !!MLX5_IPSEC_DEV(priv->mdev); - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - MLX5_SET(sqc, sqc, allow_swp, allow_swp); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); - param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); -} - -static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); - if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) - MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); -} - -void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_cq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - bool hw_stridx = false; - void *cqc = param->cqc; - u8 log_cq_size; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); - hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - log_cq_size = params->log_rq_mtu_frames; - } - - MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? - MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); - MLX5_SET(cqc, cqc, cqe_comp_en, 1); - } - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; -} - -void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; -} - -void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); - - mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - -void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - - MLX5_SET(wq, wq, log_wq_sz, log_wq_size); - MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - mlx5e_build_ico_cq_param(priv, log_wq_size, ¶m->cqp); -} - -static void mlx5e_build_async_icosq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - u8 log_wq_size, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - - /* async_icosq is used by XSK only if xdp_prog is active */ - if (params->xdp_prog) - param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ - MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - MLX5_SET(wq, wq, log_wq_sz, log_wq_size); - mlx5e_build_ico_cq_param(priv, log_wq_size, ¶m->cqp); -} - -void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); -} - -static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, - struct mlx5e_rq_param *rqp) -{ - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, - order_base_2(MLX5E_UMR_WQEBBS) + - mlx5e_get_rq_log_wq_sz(rqp->rqc)); - default: /* MLX5_WQ_TYPE_CYCLIC */ - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - } -} - -static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev) -{ - if (netdev->hw_features & NETIF_F_HW_TLS_RX) - return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; -} - -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam) -{ - u8 icosq_log_wq_sz, async_icosq_log_wq_sz; - - mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); - - icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); - async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev); - - mlx5e_build_sq_param(priv, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_async_icosq_param(priv, params, async_icosq_log_wq_sz, &cparam->async_icosq); -} - int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { @@@ -2075,7 -2434,7 +2076,7 @@@ if (!chs->c || !cparam) goto err_free;
- mlx5e_build_channel_param(priv, &chs->params, cparam); + mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL;
@@@ -2087,8 -2446,9 +2088,8 @@@ goto err_close_channels; }
- if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) { - err = mlx5e_port_ptp_open(priv, &chs->params, chs->c[0]->lag_port, - &chs->port_ptp); + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); if (err) goto err_close_channels; } @@@ -2102,8 -2462,8 +2103,8 @@@ return 0;
err_close_ptp: - if (chs->port_ptp) - mlx5e_port_ptp_close(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_close(chs->ptp);
err_close_channels: for (i--; i >= 0; i--) @@@ -2123,8 -2483,8 +2124,8 @@@ static void mlx5e_activate_channels(str for (i = 0; i < chs->num; i++) mlx5e_activate_channel(chs->c[i]);
- if (chs->port_ptp) - mlx5e_ptp_activate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_activate_channel(chs->ptp); }
#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ @@@ -2151,8 -2511,8 +2152,8 @@@ static void mlx5e_deactivate_channels(s { int i;
- if (chs->port_ptp) - mlx5e_ptp_deactivate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_deactivate_channel(chs->ptp);
for (i = 0; i < chs->num; i++) mlx5e_deactivate_channel(chs->c[i]); @@@ -2162,10 -2522,11 +2163,10 @@@ void mlx5e_close_channels(struct mlx5e_ { int i;
- if (chs->port_ptp) { - mlx5e_port_ptp_close(chs->port_ptp); - chs->port_ptp = NULL; + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; } - for (i = 0; i < chs->num; i++) mlx5e_close_channel(chs->c[i]);
@@@ -2221,12 -2582,12 +2222,12 @@@ int mlx5e_create_indirect_rqt(struct ml return err; }
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int err; int ix;
- for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); if (unlikely(err)) goto err_destroy_rqts; @@@ -2242,11 -2603,11 +2243,11 @@@ err_destroy_rqts return err; }
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i;
- for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_rqt(priv, &tirs[i].rqt); }
@@@ -2329,8 -2690,7 +2330,8 @@@ static u32 mlx5e_get_direct_rqn(struct }
static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, - struct mlx5e_redirect_rqt_param rrp) + struct mlx5e_redirect_rqt_param rrp, + struct mlx5e_redirect_rqt_param *ptp_rrp) { u32 rqtn; int ix; @@@ -2356,17 -2716,11 +2357,17 @@@ rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } + if (ptp_rrp) { + rqtn = priv->ptp_tir.rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp); + } }
static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { + bool rx_ptp_support = priv->profile->rx_ptp_support; + struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL; struct mlx5e_redirect_rqt_param rrp = { .is_rss = true, { @@@ -2376,22 -2730,12 +2377,22 @@@ } }, }; + struct mlx5e_redirect_rqt_param ptp_rrp; + + if (rx_ptp_support) { + u32 ptp_rqn;
- mlx5e_redirect_rqts(priv, rrp); + ptp_rrp.is_rss = false; + ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ? + priv->drop_rq.rqn : ptp_rqn; + ptp_rrp_p = &ptp_rrp; + } + mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p); }
static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) { + bool rx_ptp_support = priv->profile->rx_ptp_support; struct mlx5e_redirect_rqt_param drop_rrp = { .is_rss = false, { @@@ -2399,7 -2743,7 +2400,7 @@@ }, };
- mlx5e_redirect_rqts(priv, drop_rrp); + mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL); }
static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { @@@ -2688,8 -3032,6 +2689,8 @@@ static int mlx5e_update_netdev_queues(s nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; num_rxqs = nch * priv->profile->rq_groups; + if (priv->channels.params.ptp_rx) + num_rxqs++;
mlx5e_netdev_set_tcs(netdev, nch, ntc);
@@@ -2775,14 -3117,11 +2776,14 @@@ static void mlx5e_build_txq_maps(struc } }
- if (!priv->channels.port_ptp) + if (!priv->channels.ptp) + return; + + if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) return;
for (tc = 0; tc < num_tc; tc++) { - struct mlx5e_port_ptp *c = priv->channels.port_ptp; + struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq; @@@ -3056,7 -3395,7 +3057,7 @@@ int mlx5e_open_drop_rq(struct mlx5e_pri struct mlx5e_cq *cq = &drop_rq->cq; int err;
- mlx5e_build_drop_rq_param(priv, &rq_param); + mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); if (err) @@@ -3104,10 -3443,10 +3105,10 @@@ int mlx5e_create_tis(struct mlx5_core_d { void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
if (MLX5_GET(tisc, tisc, tls_en)) - MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); @@@ -3177,7 -3516,7 +3178,7 @@@ static void mlx5e_cleanup_nic_tx(struc static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, @@@ -3269,7 -3608,7 +3270,7 @@@ err_destroy_inner_tirs return err; }
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { struct mlx5e_tir *tir; void *tirc; @@@ -3283,7 -3622,7 +3284,7 @@@ if (!in) return -ENOMEM;
- for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { memset(in, 0, inlen); tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); @@@ -3321,11 -3660,11 +3322,11 @@@ void mlx5e_destroy_indirect_tirs(struc mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); }
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i;
- for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_tir(priv->mdev, &tirs[i]); }
@@@ -3452,16 -3791,8 +3453,16 @@@ static int mlx5e_setup_tc(struct net_de void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + bool tc_unbind = false; int err;
+ if (type == TC_SETUP_BLOCK && + ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) + tc_unbind = true; + + if (!netif_device_present(dev) && !tc_unbind) + return -ENODEV; + switch (type) { case TC_SETUP_BLOCK: { struct flow_block_offload *f = type_data; @@@ -3506,22 -3837,15 +3507,22 @@@ void mlx5e_fold_sw_stats64(struct mlx5e s->tx_dropped += sq_stats->dropped; } } - if (priv->port_ptp_opened) { + if (priv->tx_ptp_opened) { for (i = 0; i < priv->max_opened_tc; i++) { - struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i]; + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; s->tx_dropped += sq_stats->dropped; } } + if (priv->rx_ptp_opened) { + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->multicast += rq_stats->mcast_packets; + } }
void @@@ -3530,9 -3854,6 +3531,9 @@@ mlx5e_get_stats(struct net_device *dev struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ if (!netif_device_present(dev)) + return; + /* In switchdev mode, monitor counters doesn't monitor * rx/tx stats of 802_3. The update stats mechanism * should keep the 802_3 layout counters updated @@@ -3574,19 -3895,11 +3575,19 @@@ stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; }
+static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) +{ + if (mlx5e_is_uplink_rep(priv)) + return; /* no rx mode for uplink rep */ + + queue_work(priv->wq, &priv->set_rx_mode_work); +} + static void mlx5e_set_rx_mode(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); }
static int mlx5e_set_mac(struct net_device *netdev, void *addr) @@@ -3601,7 -3914,7 +3602,7 @@@ ether_addr_copy(netdev->dev_addr, saddr->sa_data); netif_addr_unlock_bh(netdev);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
return 0; } @@@ -3823,8 -4136,7 +3824,8 @@@ static netdev_features_t mlx5e_fix_feat
mutex_lock(&priv->state_lock); params = &priv->channels.params; - if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + if (!priv->fs.vlan || + !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) { /* HW strips the outer C-tag header, this is a problem * for S-tag traffic. */ @@@ -3907,7 -4219,7 +3908,7 @@@ int mlx5e_change_mtu(struct net_device
new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_channels.params); if (err) goto out;
@@@ -3971,18 -4283,9 +3972,18 @@@ static int mlx5e_change_nic_mtu(struct return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); }
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) +{ + bool set = *(bool *)ctx; + + return mlx5e_ptp_rx_manage_fs(priv, set); +} + int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { + struct mlx5e_channels new_channels = {}; struct hwtstamp_config config; + bool rx_cqe_compress_def; int err;
if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || @@@ -4002,13 -4305,11 +4003,13 @@@ }
mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + new_channels.params.ptp_rx = false; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@@ -4025,7 -4326,15 +4026,7 @@@ case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) - netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } + new_channels.params.ptp_rx = rx_cqe_compress_def; config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: @@@ -4033,20 -4342,6 +4034,20 @@@ return -ERANGE; }
+ if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + goto out; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, + &new_channels.params.ptp_rx); + if (err) { + mutex_unlock(&priv->state_lock); + return err; + } +out: memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock);
@@@ -4157,9 -4452,6 +4158,9 @@@ static int mlx5e_set_vf_link_state(stru struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev;
+ if (mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, mlx5_ifla_link2vport(link_state)); } @@@ -4171,9 -4463,6 +4172,9 @@@ int mlx5e_get_vf_config(struct net_devi struct mlx5_core_dev *mdev = priv->mdev; int err;
+ if (!netif_device_present(dev)) + return -EOPNOTSUPP; + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); if (err) return err; @@@ -4190,32 -4479,6 +4191,32 @@@ int mlx5e_get_vf_stats(struct net_devic return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } + +static bool +mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return false; + + if (!mlx5e_is_uplink_rep(priv)) + return false; + + return mlx5e_rep_has_offload_stats(dev, attr_id); +} + +static int +mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5e_rep_get_offload_stats(attr_id, dev, sp); +} #endif
static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) @@@ -4574,8 -4837,6 +4575,8 @@@ const struct net_device_ops mlx5e_netde .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif .ndo_get_devlink_port = mlx5e_get_devlink_port, }; @@@ -4589,6 -4850,93 +4590,6 @@@ void mlx5e_build_default_indir_rqt(u32 indirection_rqt[i] = i % num_channels; }
-static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) -{ - u32 link_speed = 0; - u32 pci_bw = 0; - - mlx5e_port_max_linkspeed(mdev, &link_speed); - pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); - mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - -#define MLX5E_SLOW_PCI_RATIO (2) - - return link_speed && pci_bw && - link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; -} - -static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) -{ - return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - DIM_CQ_PERIOD_MODE_START_FROM_CQE : - DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->tx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); - } else { - params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); - } -} - -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->rx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); - } else { - params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); - } -} - -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_tx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, - params->tx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -} - -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_rx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -} - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@@ -4601,6 -4949,25 +4602,6 @@@ return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); }
-void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - /* Prefer Striding RQ, unless any of the following holds: - * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. - * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. - * - * No XSK params: checking the availability of striding RQ in general. - */ - if (!slow_pci_heuristic(mdev) && - mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || - !mlx5e_rx_is_linear_skb(params, NULL))) - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); - mlx5e_set_rq_type(mdev, params); - mlx5e_init_rq_type_params(mdev, params); -} - void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels) { @@@ -4931,6 -5298,10 +4932,6 @@@ static int mlx5e_nic_init(struct mlx5_c if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
- err = mlx5e_devlink_port_register(priv); - if (err) - mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); - mlx5e_health_create_reporters(priv);
return 0; @@@ -4939,6 -5310,7 +4940,6 @@@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_devlink_port_unregister(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); } @@@ -4946,7 -5318,6 +4947,7 @@@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err;
mlx5e_create_q_counters(priv); @@@ -4961,7 -5332,7 +4962,7 @@@ if (err) goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts;
@@@ -4969,30 -5340,22 +4970,30 @@@ if (err) goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs;
- err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_direct_tirs;
- err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_xsk_rqts;
+ err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_xsk_tirs; + + err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_ptp_rqt; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_xsk_tirs; + goto err_destroy_ptp_direct_tir; }
err = mlx5e_tc_nic_init(priv); @@@ -5013,20 -5376,16 +5014,20 @@@ err_tc_nic_cleanup mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_ptp_direct_tir: + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); +err_destroy_ptp_rqt: + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); err_destroy_xsk_tirs: - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); err_destroy_xsk_rqts: - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@@ -5038,18 -5397,14 +5039,18 @@@ err_destroy_q_counters
static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@@ -5095,7 -5450,7 +5096,7 @@@ static void mlx5e_nic_enable(struct mlx return; mlx5e_dcbnl_init_app(priv);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
rtnl_lock(); if (netif_running(netdev)) @@@ -5118,7 -5473,7 +5119,7 @@@ static void mlx5e_nic_disable(struct ml netif_device_detach(priv->netdev); rtnl_unlock();
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) @@@ -5157,7 -5512,6 +5158,7 @@@ static const struct mlx5e_profile mlx5e .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, + .rx_ptp_support = true, };
/* mlx5e generic netdev management API (move to en_common.c) */ @@@ -5392,11 -5746,6 +5393,11 @@@ rollback return err; }
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) +{ + mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); +} + void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; @@@ -5479,17 -5828,10 +5480,17 @@@ static int mlx5e_probe(struct auxiliary
priv->profile = profile; priv->ppriv = NULL; + + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_destroy_netdev; + } + err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); - goto err_destroy_netdev; + goto err_devlink_cleanup; }
err = mlx5e_resume(adev); @@@ -5507,15 -5849,12 +5508,15 @@@ mlx5e_devlink_port_type_eth_set(priv);
mlx5e_dcbnl_init_app(priv); + mlx5_uplink_netdev_set(mdev, netdev); return 0;
err_resume: mlx5e_suspend(adev, state); err_profile_cleanup: profile->cleanup(priv); +err_devlink_cleanup: + mlx5e_devlink_port_unregister(priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); return err; @@@ -5530,7 -5869,6 +5531,7 @@@ static void mlx5e_remove(struct auxilia unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); + mlx5e_devlink_port_unregister(priv); mlx5e_destroy_netdev(priv); }
@@@ -5556,18 -5894,18 +5557,18 @@@ int mlx5e_init(void
mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); - ret = mlx5e_rep_init(); + ret = auxiliary_driver_register(&mlx5e_driver); if (ret) return ret;
- ret = auxiliary_driver_register(&mlx5e_driver); + ret = mlx5e_rep_init(); if (ret) - mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); return ret; }
void mlx5e_cleanup(void) { - auxiliary_driver_unregister(&mlx5e_driver); mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 58a2c1abb195,8d39bfee84a9..e58ef8c713e4 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@@ -40,12 -40,10 +40,12 @@@ #include "eswitch.h" #include "en.h" #include "en_rep.h" +#include "en/params.h" #include "en/txrx.h" #include "en_tc.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" +#include "en/devlink.h" #include "fs_core.h" #include "lib/mlx5.h" #define CREATE_TRACE_POINTS @@@ -71,6 -69,16 +71,6 @@@ static void mlx5e_rep_get_drvinfo(struc fw_rev_sub(mdev), mdev->board_id); }
-static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - mlx5e_rep_get_drvinfo(dev, drvinfo); - strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev), - sizeof(drvinfo->bus_info)); -} - static const struct counter_desc sw_rep_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, @@@ -277,6 -285,46 +277,6 @@@ static u32 mlx5e_rep_get_rxfh_indir_siz return mlx5e_ethtool_get_rxfh_indir_size(priv); }
-static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev, - struct ethtool_pause_stats *stats) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - mlx5e_stats_pause_get(priv, stats); -} - -static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - mlx5e_ethtool_get_pauseparam(priv, pauseparam); -} - -static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_set_pauseparam(priv, pauseparam); -} - -static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *link_ksettings) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); -} - -static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *link_ksettings) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); -} - static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@@ -296,6 -344,34 +296,6 @@@ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, };
-static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USE_ADAPTIVE, - .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_strings = mlx5e_rep_get_strings, - .get_sset_count = mlx5e_rep_get_sset_count, - .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, - .get_ringparam = mlx5e_rep_get_ringparam, - .set_ringparam = mlx5e_rep_set_ringparam, - .get_channels = mlx5e_rep_get_channels, - .set_channels = mlx5e_rep_set_channels, - .get_coalesce = mlx5e_rep_get_coalesce, - .set_coalesce = mlx5e_rep_set_coalesce, - .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings, - .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, - .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, - .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, - .get_rxfh = mlx5e_get_rxfh, - .set_rxfh = mlx5e_set_rxfh, - .get_rxnfc = mlx5e_get_rxnfc, - .set_rxnfc = mlx5e_set_rxnfc, - .get_pause_stats = mlx5e_uplink_rep_get_pause_stats, - .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, - .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, -}; - static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { @@@ -335,7 -411,8 +335,7 @@@ static int mlx5e_sqs2vport_start(struc }
/* Add re-inject rule to the PF/representor sqs */ - flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - rep->vport, + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@@ -445,7 -522,7 +445,7 @@@ bool mlx5e_is_uplink_rep(struct mlx5e_p return (rep->vport == MLX5_VPORT_UPLINK); }
-static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@@ -465,8 -542,8 +465,8 @@@ mlx5e_get_sw_stats64(const struct net_d return 0; }
-static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@@ -491,6 -568,34 +491,6 @@@ static int mlx5e_rep_change_mtu(struct return mlx5e_change_mtu(netdev, new_mtu, NULL); }
-static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) -{ - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); -} - -static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) -{ - struct sockaddr *saddr = addr; - - if (!is_valid_ether_addr(saddr->sa_data)) - return -EADDRNOTAVAIL; - - ether_addr_copy(netdev->dev_addr, saddr->sa_data); - return 0; -} - -static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, - __be16 vlan_proto) -{ - netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); - - if (vlan != 0) - return -EOPNOTSUPP; - - /* allow setting 0-vid for compatibility with libvirt */ - return 0; -} - static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@@ -536,10 -641,29 +536,10 @@@ static const struct net_device_ops mlx5 .ndo_change_carrier = mlx5e_rep_change_carrier, };
-static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { - .ndo_open = mlx5e_open, - .ndo_stop = mlx5e_close, - .ndo_start_xmit = mlx5e_xmit, - .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, - .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, - .ndo_get_stats64 = mlx5e_get_stats, - .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, - .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_uplink_rep_change_mtu, - .ndo_features_check = mlx5e_features_check, - .ndo_set_vf_mac = mlx5e_set_vf_mac, - .ndo_set_vf_rate = mlx5e_set_vf_rate, - .ndo_get_vf_config = mlx5e_get_vf_config, - .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, - .ndo_set_features = mlx5e_set_features, -}; - bool mlx5e_eswitch_uplink_rep(struct net_device *netdev) { - return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep; + return netdev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_is_uplink_rep(netdev_priv(netdev)); }
bool mlx5e_eswitch_vf_rep(struct net_device *netdev) @@@ -589,15 -713,26 +589,15 @@@ static void mlx5e_build_rep_params(stru }
static void mlx5e_build_rep_netdev(struct net_device *netdev, - struct mlx5_core_dev *mdev, - struct mlx5_eswitch_rep *rep) + struct mlx5_core_dev *mdev) { SET_NETDEV_DEV(netdev, mdev->device); - if (rep->vport == MLX5_VPORT_UPLINK) { - netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; - /* we want a persistent mac for the uplink rep */ - mlx5_query_mac_address(mdev, netdev->dev_addr); - netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; - mlx5e_dcbnl_build_rep_netdev(netdev); - } else { - netdev->netdev_ops = &mlx5e_netdev_ops_rep; - eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; - } + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->watchdog_timeo = 15 * HZ;
- netdev->features |= NETIF_F_NETNS_LOCAL; - #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; #endif @@@ -609,9 -744,12 +609,9 @@@ netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM;
- if (rep->vport == MLX5_VPORT_UPLINK) - netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - else - netdev->features |= NETIF_F_VLAN_CHALLENGED; - netdev->features |= netdev->hw_features; + netdev->features |= NETIF_F_VLAN_CHALLENGED; + netdev->features |= NETIF_F_NETNS_LOCAL; }
static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@@ -752,7 -890,6 +752,7 @@@ int mlx5e_rep_bond_update(struct mlx5e_ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err;
mlx5e_init_l2_addr(priv); @@@ -767,7 -904,7 +767,7 @@@ if (err) goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts;
@@@ -775,7 -912,7 +775,7 @@@ if (err) goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs;
@@@ -800,11 -937,11 +800,11 @@@ err_destroy_root_ft err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@@ -814,15 -951,13 +814,15 @@@
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@@ -972,22 -1107,15 +972,23 @@@ static void mlx5e_uplink_rep_enable(str
mlx5e_rep_tc_enable(priv);
- mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, - 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); + if (MLX5_CAP_GEN(mdev, uplink_follow)) + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); mlx5e_rep_neigh_init(rpriv); + + netdev->wanted_features |= NETIF_F_HW_TC; + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); }
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) @@@ -995,12 -1123,6 +996,12 @@@ struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev;
+ rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); @@@ -1061,7 -1183,6 +1062,7 @@@ static const struct mlx5e_profile mlx5e .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, + .rx_ptp_support = false, };
static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@@ -1078,65 -1199,33 +1079,65 @@@ .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = MLX5E_MAX_NUM_TC, - .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + /* XSK is needed so we can replace profile with NIC netdev */ + .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, + .rx_ptp_support = false, };
/* e-Switch vport representors */ static int -mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev)); + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct devlink_port *dl_port; + int err; + + rpriv->netdev = priv->netdev; + + err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, + rpriv); + if (err) + return err; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_eth_set(dl_port, rpriv->netdev); + + return 0; +} + +static void +mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) +{ + struct net_device *netdev = rpriv->netdev; + struct devlink_port *dl_port; + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + dev = priv->mdev; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_clear(dl_port); + mlx5e_netdev_attach_nic_profile(priv); +} + +static int +mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); const struct mlx5e_profile *profile; - struct mlx5e_rep_priv *rpriv; struct devlink_port *dl_port; struct net_device *netdev; struct mlx5e_priv *priv; unsigned int txqs, rxqs; int nch, err;
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); - if (!rpriv) - return -ENOMEM; - - /* rpriv->rep to be looked up when profile->init() is called */ - rpriv->rep = rep; - - profile = (rep->vport == MLX5_VPORT_UPLINK) ? - &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; - + profile = &mlx5e_rep_profile; nch = mlx5e_get_max_num_channels(dev); txqs = nch * profile->max_tc; rxqs = nch * profile->rq_groups; @@@ -1145,11 -1234,21 +1146,11 @@@ mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", rep->vport); - kfree(rpriv); return -EINVAL; }
- mlx5e_build_rep_netdev(netdev, dev, rep); - + mlx5e_build_rep_netdev(netdev, dev); rpriv->netdev = netdev; - rep->rep_data[REP_ETH].priv = rpriv; - INIT_LIST_HEAD(&rpriv->vport_sqs_list); - - if (rep->vport == MLX5_VPORT_UPLINK) { - err = mlx5e_create_mdev_resources(dev); - if (err) - goto err_destroy_netdev; - }
priv = netdev_priv(netdev); priv->profile = profile; @@@ -1157,7 -1256,7 +1158,7 @@@ err = profile->init(dev, netdev); if (err) { netdev_warn(netdev, "rep profile init failed, %d\n", err); - goto err_destroy_mdev_resources; + goto err_destroy_netdev; }
err = mlx5e_attach_netdev(netdev_priv(netdev)); @@@ -1187,34 -1286,13 +1188,34 @@@ err_detach_netdev err_cleanup_profile: priv->profile->cleanup(priv);
-err_destroy_mdev_resources: - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_destroy_mdev_resources(dev); - err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); - kfree(rpriv); + return err; +} + +static int +mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + int err; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + rep->rep_data[REP_ETH].priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + + if (rep->vport == MLX5_VPORT_UPLINK) + err = mlx5e_vport_uplink_rep_load(dev, rep); + else + err = mlx5e_vport_vf_rep_load(dev, rep); + + if (err) + kfree(rpriv); + return err; }
@@@ -1228,19 -1306,15 +1229,19 @@@ mlx5e_vport_rep_unload(struct mlx5_eswi struct devlink_port *dl_port; void *ppriv = priv->ppriv;
+ if (rep->vport == MLX5_VPORT_UPLINK) { + mlx5e_vport_uplink_rep_unload(rpriv); + goto free_ppriv; + } + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); if (dl_port) devlink_port_type_clear(dl_port); unregister_netdev(netdev); mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); +free_ppriv: kfree(ppriv); /* mlx5e_rep_priv */ }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index f67e51d8291a,88a01c59ce61..ae0570ea08bf --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@@ -116,7 -116,6 +116,6 @@@ static const struct counter_desc sw_sta #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, @@@ -180,8 -179,6 +179,6 @@@ #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_ctx) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_del) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) }, @@@ -342,8 -339,6 +339,6 @@@ static void mlx5e_stats_grp_sw_update_s #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; - s->rx_tls_ctx += rq_stats->tls_ctx; - s->rx_tls_del += rq_stats->tls_del; s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt; s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start; s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end; @@@ -390,7 -385,6 +385,6 @@@ static void mlx5e_stats_grp_sw_update_s #ifdef CONFIG_MLX5_EN_TLS s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; - s->tx_tls_ctx += sq_stats->tls_ctx; s->tx_tls_ooo += sq_stats->tls_ooo; s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; s->tx_tls_dump_packets += sq_stats->tls_dump_packets; @@@ -407,21 -401,13 +401,21 @@@ static void mlx5e_stats_grp_sw_update_s { int i;
- if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return;
- mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->port_ptp_stats.ch); + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
- for (i = 0; i < priv->max_opened_tc; i++) { - mlx5e_stats_grp_sw_update_stats_sq(s, &priv->port_ptp_stats.sq[i]); + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + if (priv->rx_ptp_opened) { + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq);
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ barrier(); @@@ -1630,8 -1616,6 +1624,6 @@@ static const struct counter_desc rq_sta #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_ctx) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_del) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) }, @@@ -1658,7 -1642,6 +1650,6 @@@ static const struct counter_desc sq_sta #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@@ -1768,38 -1751,6 +1759,38 @@@ static const struct counter_desc ptp_cq { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, };
+static const struct counter_desc ptp_rq_stats_desc[] = { + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, +}; + static const struct counter_desc qos_sq_stats_desc[] = { { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, @@@ -1816,7 -1767,6 +1807,6 @@@ #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@@ -1845,7 -1795,6 +1835,7 @@@ #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc) #define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) @@@ -1892,46 -1841,32 +1882,46 @@@ static MLX5E_DECLARE_STATS_GRP_OP_UPDAT
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { - return priv->port_ptp_opened ? - NUM_PTP_CH_STATS + - ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) : - 0; + int num = NUM_PTP_CH_STATS; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return 0; + + if (priv->tx_ptp_opened) + num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc; + if (priv->rx_ptp_opened) + num += NUM_PTP_RQ_STATS; + + return num; }
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) { int i, tc;
- if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx;
for (i = 0; i < NUM_PTP_CH_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, ptp_ch_stats_desc[i].format);
- for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_sq_stats_desc[i].format, tc); + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_sq_stats_desc[i].format, tc);
- for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_cq_stats_desc[i].format, tc); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_cq_stats_desc[i].format, tc); + ptp_rq_stats_desc[i].format); + } return idx; }
@@@ -1939,33 -1874,26 +1929,33 @@@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_ { int i, tc;
- if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx;
for (i = 0; i < NUM_PTP_CH_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.ch, + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, ptp_ch_stats_desc, i);
- for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.sq[tc], - ptp_sq_stats_desc, i); - - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i); + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.cq[tc], - ptp_cq_stats_desc, i); - + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, + ptp_rq_stats_desc, i); + } return idx; }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index ca398eac09c1,adf9b7b8b712..21d3b8747f93 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@@ -54,7 -54,6 +54,7 @@@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
@@@ -192,7 -191,6 +192,6 @@@ struct mlx5e_sw_stats #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; - u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_dump_packets; u64 tx_tls_dump_bytes; @@@ -203,8 -201,6 +202,6 @@@
u64 rx_tls_decrypted_packets; u64 rx_tls_decrypted_bytes; - u64 rx_tls_ctx; - u64 rx_tls_del; u64 rx_tls_resync_req_pkt; u64 rx_tls_resync_req_start; u64 rx_tls_resync_req_end; @@@ -335,8 -331,6 +332,6 @@@ struct mlx5e_rq_stats #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; - u64 tls_ctx; - u64 tls_del; u64 tls_resync_req_pkt; u64 tls_resync_req_start; u64 tls_resync_req_end; @@@ -365,7 -359,6 +360,6 @@@ struct mlx5e_sq_stats #ifdef CONFIG_MLX5_EN_TLS u64 tls_encrypted_packets; u64 tls_encrypted_bytes; - u64 tls_ctx; u64 tls_ooo; u64 tls_dump_packets; u64 tls_dump_bytes; diff --combined drivers/net/ethernet/mellanox/mlx5/core/eq.c index 4e8381030d77,1fa9c18563da..77c0ca655975 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@@ -271,7 -271,7 +271,7 @@@ static void init_eq_buf(struct mlx5_eq struct mlx5_eqe *eqe; int i;
- for (i = 0; i < eq->nent; i++) { + for (i = 0; i < eq_get_size(eq); i++) { eqe = get_eqe(eq, i); eqe->owner = MLX5_EQE_OWNER_INIT_VAL; } @@@ -281,10 -281,8 +281,10 @@@ static in create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { + u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; + u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; u8 vecidx = param->irq_index; __be64 *pas; @@@ -299,18 -297,16 +299,18 @@@ spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
- eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; - err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); + + err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), + &eq->frag_buf, dev->priv.numa_node); if (err) return err;
+ mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) + - MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL); if (!in) { @@@ -319,7 -315,7 +319,7 @@@ }
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); - mlx5_fill_page_array(&eq->buf, pas); + mlx5_fill_page_frag_array(&eq->frag_buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) @@@ -330,11 -326,11 +330,11 @@@ param->mask[i]);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); - MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); + MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, - eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) @@@ -360,7 -356,7 +360,7 @@@ err_in kvfree(in);
err_buf: - mlx5_buf_free(dev, &eq->buf); + mlx5_frag_buf_free(dev, &eq->frag_buf); return err; }
@@@ -417,7 -413,7 +417,7 @@@ static int destroy_unmap_eq(struct mlx5 eq->eqn); synchronize_irq(eq->irqn);
- mlx5_buf_free(dev, &eq->buf); + mlx5_frag_buf_free(dev, &eq->frag_buf);
return err; } @@@ -768,11 -764,10 +768,11 @@@ EXPORT_SYMBOL(mlx5_eq_destroy_generic) struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) { u32 ci = eq->cons_index + cc; + u32 nent = eq_get_size(eq); struct mlx5_eqe *eqe;
- eqe = get_eqe(eq, ci & (eq->nent - 1)); - eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + eqe = get_eqe(eq, ci & (nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@@ -936,13 -931,24 +936,24 @@@ void mlx5_core_eq_free_irqs(struct mlx5 mutex_unlock(&table->lock); }
+ #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + #define MLX5_MAX_ASYNC_EQS 4 + #else + #define MLX5_MAX_ASYNC_EQS 3 + #endif + int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); int err;
eq_table->num_comp_eqs = - mlx5_irq_get_num_comp(eq_table->irq_table); + min_t(int, + mlx5_irq_get_num_comp(eq_table->irq_table), + num_eqs - MLX5_MAX_ASYNC_EQS);
err = create_async_eqs(dev); if (err) { diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8b82f44bd6a7,d4a2f8d1ee9f..ab32f685cbb7 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -40,6 -40,7 +40,6 @@@ #include "eswitch.h" #include "esw/indir_table.h" #include "esw/acl/ofld.h" -#include "esw/indir_table.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@@ -47,7 -48,6 +47,7 @@@ #include "lib/eq.h" #include "lib/fs_chains.h" #include "en_tc.h" +#include "en/mapping.h"
/* There are two match-all miss flows, one for unicast dst mac and * one for multicast. @@@ -55,14 -55,184 +55,14 @@@ #define MLX5_ESW_MISS_FLOWS (2) #define UPLINK_REP_INDEX 0
-/* Per vport tables */ - -#define MLX5_ESW_VPORT_TABLE_SIZE 128 - -/* This struct is used as a key to the hash table and we need it to be packed - * so hash result is consistent - */ -struct mlx5_vport_key { - u32 chain; - u16 prio; - u16 vport; - u16 vhca_id; -} __packed; - -struct mlx5_vport_tbl_attr { - u16 chain; - u16 prio; - u16 vport; -}; - -struct mlx5_vport_table { - struct hlist_node hlist; - struct mlx5_flow_table *fdb; - u32 num_rules; - struct mlx5_vport_key key; -}; - +#define MLX5_ESW_VPORT_TBL_SIZE 128 #define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4
-static struct mlx5_flow_table * -esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *fdb; - - ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS; - ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE; - ft_attr.prio = FDB_PER_VPORT; - fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", - PTR_ERR(fdb)); - } - - return fdb; -} - -static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, - struct mlx5_vport_tbl_attr *attr, - struct mlx5_vport_key *key) -{ - key->vport = attr->vport; - key->chain = attr->chain; - key->prio = attr->prio; - key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); - return jhash(key, sizeof(*key), 0); -} - -/* caller must hold vports.lock */ -static struct mlx5_vport_table * -esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) -{ - struct mlx5_vport_table *e; - - hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) - if (!memcmp(&e->key, skey, sizeof(*skey))) - return e; - - return NULL; -} - -static void -esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) -{ - struct mlx5_vport_table *e; - struct mlx5_vport_key key; - u32 hkey; - - mutex_lock(&esw->fdb_table.offloads.vports.lock); - hkey = flow_attr_to_vport_key(esw, attr, &key); - e = esw_vport_tbl_lookup(esw, &key, hkey); - if (!e || --e->num_rules) - goto out; - - hash_del(&e->hlist); - mlx5_destroy_flow_table(e->fdb); - kfree(e); -out: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); -} - -static struct mlx5_flow_table * -esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) -{ - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *fdb; - struct mlx5_vport_table *e; - struct mlx5_vport_key skey; - u32 hkey; - - mutex_lock(&esw->fdb_table.offloads.vports.lock); - hkey = flow_attr_to_vport_key(esw, attr, &skey); - e = esw_vport_tbl_lookup(esw, &skey, hkey); - if (e) { - e->num_rules++; - goto out; - } - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) { - fdb = ERR_PTR(-ENOMEM); - goto err_alloc; - } - - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!ns) { - esw_warn(dev, "Failed to get FDB namespace\n"); - fdb = ERR_PTR(-ENOENT); - goto err_ns; - } - - fdb = esw_vport_tbl_create(esw, ns); - if (IS_ERR(fdb)) - goto err_ns; - - e->fdb = fdb; - e->num_rules = 1; - e->key = skey; - hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); -out: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - return e->fdb; - -err_ns: - kfree(e); -err_alloc: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - return fdb; -} - -int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw) -{ - struct mlx5_vport_tbl_attr attr; - struct mlx5_flow_table *fdb; - struct mlx5_vport *vport; - int i; - - attr.chain = 0; - attr.prio = 1; - mlx5_esw_for_all_vports(esw, i, vport) { - attr.vport = vport->vport; - fdb = esw_vport_tbl_get(esw, &attr); - if (IS_ERR(fdb)) - goto out; - } - return 0; - -out: - mlx5_esw_vport_tbl_put(esw); - return PTR_ERR(fdb); -} - -void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw) -{ - struct mlx5_vport_tbl_attr attr; - struct mlx5_vport *vport; - int i; - - attr.chain = 0; - attr.prio = 1; - mlx5_esw_for_all_vports(esw, i, vport) { - attr.vport = vport->vport; - esw_vport_tbl_put(esw, &attr); - } -} - -/* End: Per vport tables */ +static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE, + .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, + .flags = 0, +};
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) @@@ -86,26 -256,6 +86,26 @@@ mlx5_eswitch_set_rule_flow_source(struc MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; }
+/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits + * are not needed as well in the following process. So clear them all for simplicity. + */ +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec) +{ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + void *misc2; + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2))) + spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2; + } +} + static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@@ -176,19 -326,6 +176,19 @@@ esw_cleanup_decap_indir(struct mlx5_esw true); }
+static int +esw_setup_sampler_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_esw_flow_attr *esw_attr, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest[i].sampler_id = esw_attr->sample->sampler_id; + + return 0; +} + static int esw_setup_ft_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@@ -400,6 -537,14 +400,14 @@@ esw_setup_vport_dests(struct mlx5_flow_ return i; }
+ static bool + esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) + { + return MLX5_CAP_GEN(esw->dev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); + } + static int esw_setup_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@@ -413,15 -558,10 +421,13 @@@ int err = 0;
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && - MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && - mlx5_eswitch_vport_match_metadata_enabled(esw) && - MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + esw_src_port_rewrite_supported(esw)) attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
- if (attr->dest_ft) { + if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { + esw_setup_sampler_dest(dest, flow_act, esw_attr, *i); + (*i)++; + } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { @@@ -524,16 -664,12 +530,16 @@@ mlx5_eswitch_add_offloaded_rule(struct if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr;
- if (split) { + /* esw_attr->sample is allocated only when there is a sample action */ + if (esw_attr->sample && esw_attr->sample->sample_default_tbl) { + fdb = esw_attr->sample->sample_default_tbl; + } else if (split) { fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
- fdb = esw_vport_tbl_get(esw, &fwd_attr); + fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); } else { if (attr->chain || attr->prio) fdb = mlx5_chains_get_table(chains, attr->chain, @@@ -565,7 -701,7 +571,7 @@@
err_add_rule: if (split) - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_esw_get: @@@ -598,8 -734,7 +604,8 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; - fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr); + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@@ -644,7 -779,7 +650,7 @@@ return rule; err_chain_src_rewrite: esw_put_dest_tables_loop(esw, attr, 0, i); - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_get_fast: @@@ -679,16 -814,15 +685,16 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; }
if (fwd_rule) { - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count); } else { if (split) - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); esw_cleanup_dests(esw, attr); @@@ -909,8 -1043,7 +915,8 @@@ out }
struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch_rep *rep, u32 sqn) { struct mlx5_flow_act flow_act = {0}; @@@ -928,30 -1061,21 +934,30 @@@ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(rep->esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = vport; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) - esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); + esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", + PTR_ERR(flow_rule)); out: kvfree(spec); return flow_rule; @@@ -1329,14 -1453,14 +1335,14 @@@ esw_add_restore_rule(struct mlx5_eswitc if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) return ERR_PTR(-EOPNOTSUPP);
- spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - ESW_CHAIN_TAG_METADATA_MASK); + ESW_REG_C0_USER_DATA_METADATA_MASK); misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); @@@ -1352,7 -1476,7 +1358,7 @@@ dest.ft = esw->offloads.ft_offloads;
flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - kfree(spec); + kvfree(spec);
if (IS_ERR(flow_rule)) esw_warn(esw->dev, @@@ -1362,6 -1486,12 +1368,6 @@@ return flow_rule; }
-u32 -esw_get_max_restore_tag(struct mlx5_eswitch *esw) -{ - return ESW_CHAIN_TAG_METADATA_MASK; -} - #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32
@@@ -1391,44 -1521,6 +1397,44 @@@ static void esw_set_flow_group_source_p }
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static void esw_vport_tbl_put(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_vport *vport; + int i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + mlx5_esw_vporttbl_put(esw, &attr); + } +} + +static int esw_vport_tbl_get(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + int i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fdb = mlx5_esw_vporttbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + +out: + esw_vport_tbl_put(esw); + return PTR_ERR(fdb); +} + #define fdb_modify_header_fwd_to_table_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags) @@@ -1478,7 -1570,7 +1484,7 @@@ esw_chains_create(struct mlx5_eswitch * attr.max_ft_sz = fdb_max; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; - attr.max_restore_tag = esw_get_max_restore_tag(esw); + attr.mapping = esw->offloads.reg_c0_obj_pool;
chains = mlx5_chains_create(dev, &attr); if (IS_ERR(chains)) { @@@ -1506,7 -1598,7 +1512,7 @@@
/* Open level 1 for split fdb rules now if prios isn't supported */ if (!mlx5_chains_prios_supported(chains)) { - err = mlx5_esw_vport_tbl_get(esw); + err = esw_vport_tbl_get(esw); if (err) goto level_1_err; } @@@ -1530,7 -1622,7 +1536,7 @@@ static voi esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) { if (!mlx5_chains_prios_supported(chains)) - mlx5_esw_vport_tbl_put(esw); + esw_vport_tbl_put(esw); mlx5_chains_put_table(chains, 0, 1, 0); mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); mlx5_chains_destroy(chains); @@@ -1617,12 -1709,6 +1623,12 @@@ static int esw_create_offloads_fdb_tabl
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + }
ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); @@@ -1636,36 -1722,40 +1642,40 @@@ } esw->fdb_table.offloads.send_to_vport_grp = g;
- /* meta send to vport */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); + if (esw_src_port_rewrite_supported(esw)) { + /* meta send to vport */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); - - num_vfs = esw->esw_funcs.num_vfs; - if (num_vfs) { - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1); - ix += num_vfs; - - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", - err); - goto send_vport_meta_err; + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + num_vfs = esw->esw_funcs.num_vfs; + if (num_vfs) { + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, ix + num_vfs - 1); + ix += num_vfs; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", + err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); + if (err) + goto meta_rule_err; } - esw->fdb_table.offloads.send_to_vport_meta_grp = g; - - err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); - if (err) - goto meta_rule_err; }
if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { @@@ -1775,7 -1865,6 +1785,7 @@@ static void esw_destroy_offloads_fdb_ta /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); + atomic64_set(&esw->user_count, 0); }
static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@@ -1978,7 -2067,7 +1988,7 @@@ static int esw_create_restore_table(str goto out_free; }
- ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS; ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) { err = PTR_ERR(ft); @@@ -1993,7 -2082,7 +2003,7 @@@ misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - ESW_CHAIN_TAG_METADATA_MASK); + ESW_REG_C0_USER_DATA_METADATA_MASK); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft_attr.max_fte - 1); @@@ -2181,11 -2270,9 +2191,11 @@@ int esw_offloads_load_rep(struct mlx5_e if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0;
- err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); - if (err) - return err; + if (vport_num != MLX5_VPORT_UPLINK) { + err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); + if (err) + return err; + }
err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) @@@ -2193,8 -2280,7 +2203,8 @@@ return err;
load_err: - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); return err; }
@@@ -2204,9 -2290,7 +2214,9 @@@ void esw_offloads_unload_rep(struct mlx return;
mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); }
#define ESW_OFFLOADS_DEVCOM_PAIR (0) @@@ -2215,8 -2299,13 +2225,8 @@@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw) { - int err; - - err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); - if (err) - return err;
- return 0; + return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); }
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) @@@ -2476,9 -2565,6 +2486,9 @@@ static int esw_create_uplink_offloads_a struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return PTR_ERR(vport); + return esw_vport_create_offloads_acl_tables(esw, vport); }
@@@ -2487,9 -2573,6 +2497,9 @@@ static void esw_destroy_uplink_offloads struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return; + esw_vport_destroy_offloads_acl_tables(esw, vport); }
@@@ -2501,7 -2584,6 +2511,7 @@@ static int esw_offloads_steering_init(s memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.vports.lock); hash_init(esw->fdb_table.offloads.vports.table); + atomic64_set(&esw->user_count, 0);
indir = mlx5_esw_indir_table_init(); if (IS_ERR(indir)) { @@@ -2646,7 -2728,6 +2656,7 @@@ static int mlx5_esw_host_number_init(st
int esw_offloads_enable(struct mlx5_eswitch *esw) { + struct mapping_ctx *reg_c0_obj_pool; struct mlx5_vport *vport; int err, i;
@@@ -2674,15 -2755,6 +2684,15 @@@ if (err) goto err_vport_metadata;
+ reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + if (IS_ERR(reg_c0_obj_pool)) { + err = PTR_ERR(reg_c0_obj_pool); + goto err_pool; + } + esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool; + err = esw_offloads_steering_init(esw); if (err) goto err_steering_init; @@@ -2709,8 -2781,6 +2719,8 @@@ err_vports err_uplink: esw_offloads_steering_cleanup(esw); err_steering_init: + mapping_destroy(reg_c0_obj_pool); +err_pool: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_metadata_uninit(esw); @@@ -2749,7 -2819,6 +2759,7 @@@ void esw_offloads_disable(struct mlx5_e esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); @@@ -2856,14 -2925,8 +2866,14 @@@ int mlx5_devlink_eswitch_mode_set(struc if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL;
- mutex_lock(&esw->mode_lock); - cur_mlx5_mode = esw->mode; + err = mlx5_esw_try_lock(esw); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); + return err; + } + cur_mlx5_mode = err; + err = 0; + if (cur_mlx5_mode == mlx5_mode) goto unlock;
@@@ -2875,7 -2938,7 +2885,7 @@@ err = -EINVAL;
unlock: - mutex_unlock(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; }
@@@ -2888,14 -2951,14 +2898,14 @@@ int mlx5_devlink_eswitch_mode_get(struc if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
err = esw_mode_to_devlink(esw->mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -2911,7 -2974,7 +2921,7 @@@ int mlx5_devlink_eswitch_inline_mode_se if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto out; @@@ -2950,7 -3013,7 +2960,7 @@@ }
esw->offloads.inline_mode = mlx5_mode; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0;
revert_inline_mode: @@@ -2960,7 -3023,7 +2970,7 @@@ vport, esw->offloads.inline_mode); out: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -2973,14 -3036,14 +2983,14 @@@ int mlx5_devlink_eswitch_inline_mode_ge if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -2996,7 -3059,7 +3006,7 @@@ int mlx5_devlink_eswitch_encap_mode_set if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; @@@ -3042,7 -3105,7 +3052,7 @@@ }
unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3057,14 -3120,14 +3067,14 @@@ int mlx5_devlink_eswitch_encap_mode_get return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
*encap = esw->offloads.encap; unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0; }
@@@ -3094,7 -3157,6 +3104,7 @@@ void mlx5_eswitch_register_vport_reps(s esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { if (likely(mlx5_eswitch_vport_has_rep(esw, i))) { + rep->esw = esw; rep_data = &rep->rep_data[rep_type]; atomic_set(&rep_data->state, REP_REGISTERED); } diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 97d074d7b78d,ba28ac7e79bc..f99db88ee884 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@@ -16,12 -16,12 +16,13 @@@ #include <linux/in6.h> #include <linux/notifier.h> #include <linux/net_namespace.h> +#include <linux/spinlock.h> #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> #include <net/vxlan.h> #include <net/flow_offload.h> + #include <net/inet_ecn.h>
#include "port.h" #include "core.h" @@@ -87,15 -87,10 +88,15 @@@ enum mlxsw_sp_rif_type MLXSW_SP_RIF_TYPE_MAX, };
-struct mlxsw_sp_rif_ops; +struct mlxsw_sp_router_ops;
-extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[]; -extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[]; +extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops; +extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops; + +struct mlxsw_sp_switchdev_ops; + +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops; +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops;
enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_8021Q, @@@ -139,7 -134,6 +140,7 @@@ struct mlxsw_sp_ptp_state struct mlxsw_sp_ptp_ops; struct mlxsw_sp_span_ops; struct mlxsw_sp_qdisc_state; +struct mlxsw_sp_mall_entry;
struct mlxsw_sp_port_mapping { u8 module; @@@ -155,7 -149,6 +156,7 @@@ struct mlxsw_sp const unsigned char *mac_mask; struct mlxsw_sp_upper *lags; struct mlxsw_sp_port_mapping **port_mapping; + struct rhashtable sample_trigger_ht; struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; @@@ -172,7 -165,6 +173,7 @@@ struct mlxsw_sp_counter_pool *counter_pool; struct mlxsw_sp_span *span; struct mlxsw_sp_trap *trap; + const struct mlxsw_sp_switchdev_ops *switchdev_ops; const struct mlxsw_sp_kvdl_ops *kvdl_ops; const struct mlxsw_afa_ops *afa_ops; const struct mlxsw_afk_ops *afk_ops; @@@ -180,6 -172,7 +181,6 @@@ const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; - const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_sb_ops *sb_ops; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; @@@ -187,8 -180,6 +188,8 @@@ const struct mlxsw_sp_span_ops *span_ops; const struct mlxsw_sp_policer_core_ops *policer_core_ops; const struct mlxsw_sp_trap_ops *trap_ops; + const struct mlxsw_sp_mall_ops *mall_ops; + const struct mlxsw_sp_router_ops *router_ops; const struct mlxsw_listener *listeners; size_t listeners_count; u32 lowest_shaper_bs; @@@ -242,18 -233,7 +243,18 @@@ struct mlxsw_sp_port_pcpu_stats u32 tx_dropped; };
-struct mlxsw_sp_port_sample { +enum mlxsw_sp_sample_trigger_type { + MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, +}; + +struct mlxsw_sp_sample_trigger { + enum mlxsw_sp_sample_trigger_type type; + u8 local_port; /* Reserved when trigger type is not ingress / egress. */ +}; + +struct mlxsw_sp_sample_params { struct psample_group *psample_group; u32 trunc_size; u32 rate; @@@ -323,6 -303,7 +324,6 @@@ struct mlxsw_sp_port struct mlxsw_sp_port_xstats xstats; struct delayed_work update_dw; } periodic_hw_stats; - struct mlxsw_sp_port_sample __rcu *sample; struct list_head vlans_list; struct mlxsw_sp_port_vlan *default_vlan; struct mlxsw_sp_qdisc_state *qdisc; @@@ -367,6 -348,20 +368,20 @@@ struct mlxsw_sp_port_type_speed_ops u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap); };
+ static inline u8 mlxsw_sp_tunnel_ecn_decap(u8 outer_ecn, u8 inner_ecn, + bool *trap_en) + { + bool set_ce = false; + + *trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + if (set_ce) + return INET_ECN_CE; + else if (outer_ecn == INET_ECN_ECT_1 && inner_ecn == INET_ECN_ECT_0) + return INET_ECN_ECT_1; + else + return inner_ecn; + } + static inline struct net_device * mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) { @@@ -551,17 -546,6 +566,17 @@@ void mlxsw_sp_hdroom_bufs_reset_sizes(s struct mlxsw_sp_hdroom *hdroom); int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, const struct mlxsw_sp_hdroom *hdroom); +struct mlxsw_sp_sample_params * +mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger); +int +mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params, + struct netlink_ext_ack *extack); +void +mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger);
extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; @@@ -599,6 -583,8 +614,6 @@@ void mlxsw_sp_rx_listener_no_mark_func( u8 local_port, void *priv); void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u8 local_port); -void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port); int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, @@@ -615,8 -601,6 +630,8 @@@ int mlxsw_sp_port_vp_mode_set(struct ml int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, bool learn_enable); int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type); +int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ethtype); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u16 ethtype); struct mlxsw_sp_port_vlan * @@@ -955,12 -939,6 +970,12 @@@ int mlxsw_sp_acl_rulei_act_count(struc int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u16 fid, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, + struct netlink_ext_ack *extack);
struct mlxsw_sp_acl_rule;
@@@ -1070,19 -1048,6 +1085,19 @@@ extern const struct mlxsw_afk_ops mlxsw extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
/* spectrum_matchall.c */ +struct mlxsw_sp_mall_ops { + int (*sample_add)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack); + void (*sample_del)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry); +}; + +extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops; +extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops; + enum mlxsw_sp_mall_action_type { MLXSW_SP_MALL_ACTION_TYPE_MIRROR, MLXSW_SP_MALL_ACTION_TYPE_SAMPLE, @@@ -1098,11 -1063,6 +1113,11 @@@ struct mlxsw_sp_mall_trap_entry int span_id; };
+struct mlxsw_sp_mall_sample_entry { + struct mlxsw_sp_sample_params params; + int span_id; /* Relevant for Spectrum-2 onwards. */ +}; + struct mlxsw_sp_mall_entry { struct list_head list; unsigned long cookie; @@@ -1112,7 -1072,7 +1127,7 @@@ union { struct mlxsw_sp_mall_mirror_entry mirror; struct mlxsw_sp_mall_trap_entry trap; - struct mlxsw_sp_port_sample sample; + struct mlxsw_sp_mall_sample_entry sample; }; struct rcu_head rcu; }; @@@ -1123,8 -1083,7 +1138,8 @@@ int mlxsw_sp_mall_replace(struct mlxsw_ void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block, struct tc_cls_matchall_offload *f); int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block, - struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block, struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index, diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index b8b08a6a1d10,64a8f838eb53..5facabd86882 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@@ -127,16 -127,14 +127,16 @@@ bool mlxsw_sp_l3addr_is_zero(union mlxs
static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry) + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); - char ratr_pl[MLXSW_REG_RATR_LEN]; + enum mlxsw_reg_ratr_op op;
- mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_IPIP, + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP, adj_index, rif_index); mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
@@@ -337,12 -335,11 +337,11 @@@ static int mlxsw_sp_ipip_ecn_decap_init u8 inner_ecn, u8 outer_ecn) { char tidem_pl[MLXSW_REG_TIDEM_LEN]; - bool trap_en, set_ce = false; u8 new_inner_ecn; + bool trap_en;
- trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); - new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; - + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); diff --combined drivers/net/ethernet/microchip/lan743x_main.c index e7ab5f3f73fd,7b6794aa8ea9..11a1dc4c436d --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@@ -885,8 -885,8 +885,8 @@@ static int lan743x_mac_set_mtu(struct l }
mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_); - mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) & - MAC_RX_MAX_SIZE_MASK_); + mac_rx |= (((new_mtu + ETH_HLEN + ETH_FCS_LEN) + << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); lan743x_csr_write(adapter, MAC_RX, mac_rx);
if (enabled) { @@@ -1944,7 -1944,7 +1944,7 @@@ static int lan743x_rx_init_ring_element struct sk_buff *skb; dma_addr_t dma_ptr;
- buffer_length = netdev->mtu + ETH_HLEN + 4 + RX_HEAD_PADDING; + buffer_length = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + RX_HEAD_PADDING;
descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; @@@ -2040,7 -2040,7 +2040,7 @@@ lan743x_rx_trim_skb(struct sk_buff *skb dev_kfree_skb_irq(skb); return NULL; } - frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4); + frame_length = max_t(int, 0, frame_length - ETH_FCS_LEN); if (skb->len > frame_length) { skb->tail -= skb->len - frame_length; skb->len = frame_length; @@@ -3004,7 -3004,7 +3004,7 @@@ static int lan743x_pm_suspend(struct de lan743x_pm_set_wol(adapter);
/* Host sets PME_En, put D3hot */ - return pci_prepare_to_sleep(pdev);; + return pci_prepare_to_sleep(pdev); }
static int lan743x_pm_resume(struct device *dev) diff --combined drivers/net/ethernet/netronome/nfp/flower/main.h index e13e26e72ca0,56833a41f3d2..31377923ea3d --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@@ -47,7 -47,6 +47,7 @@@ struct nfp_app #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) #define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_VLAN_QINQ BIT(8) +#define NFP_FL_FEATS_QOS_PPS BIT(9) #define NFP_FL_FEATS_HOST_ACK BIT(31)
#define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@@ -62,8 -61,7 +62,8 @@@ NFP_FL_FEATS_FLOW_MOD | \ NFP_FL_FEATS_PRE_TUN_RULES | \ NFP_FL_FEATS_IPV6_TUN | \ - NFP_FL_FEATS_VLAN_QINQ) + NFP_FL_FEATS_VLAN_QINQ | \ + NFP_FL_FEATS_QOS_PPS)
struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@@ -192,6 -190,7 +192,7 @@@ struct nfp_fl_internal_ports * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded + * @merge_table: Hash table to store merged flows */ struct nfp_flower_priv { struct nfp_app *app; @@@ -225,6 -224,7 +226,7 @@@ unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ int pre_tun_rule_cnt; + struct rhashtable merge_table; };
/** @@@ -352,6 -352,12 +354,12 @@@ struct nfp_fl_payload_link };
extern const struct rhashtable_params nfp_flower_table_params; + extern const struct rhashtable_params merge_table_params; + + struct nfp_merge_info { + u64 parent_ctx; + struct rhash_head ht_node; + };
struct nfp_fl_stats_frame { __be32 stats_con_id; diff --combined drivers/net/ethernet/xilinx/xilinx_axienet.h index 708769349f76,aca7f82f6791..5b4d153b1492 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@@ -376,8 -376,6 +376,8 @@@ struct axidma_bd struct sk_buff *skb; } __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT);
+#define XAE_NUM_MISC_CLOCKS 3 + /** * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. @@@ -387,8 -385,7 +387,8 @@@ * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core - * @clk: Clock for AXI bus + * @axi_clk: AXI4-Lite bus clock + * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks) * @mii_bus: Pointer to MII bus structure * @mii_clk_div: MII bus clock divider value * @regs_start: Resource start for axienet device addresses @@@ -437,8 -434,7 +437,8 @@@ struct axienet_local
bool switch_x_sgmii;
- struct clk *clk; + struct clk *axi_clk; + struct clk_bulk_data misc_clks[XAE_NUM_MISC_CLOCKS];
struct mii_bus *mii_bus; u8 mii_clk_div; @@@ -508,6 -504,18 +508,18 @@@ static inline u32 axinet_ior_read_mcr(s return axienet_ior(lp, XAE_MDIO_MCR_OFFSET); }
+ static inline void axienet_lock_mii(struct axienet_local *lp) + { + if (lp->mii_bus) + mutex_lock(&lp->mii_bus->mdio_lock); + } + + static inline void axienet_unlock_mii(struct axienet_local *lp) + { + if (lp->mii_bus) + mutex_unlock(&lp->mii_bus->mdio_lock); + } + /** * axienet_iow - Memory mapped Axi Ethernet register write * @lp: Pointer to axienet local structure diff --combined drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 92cf9051d557,f8f8654ea728..feb1aa4ec927 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@@ -1053,9 -1053,9 +1053,9 @@@ static int axienet_open(struct net_devi * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); ret = axienet_device_reset(ndev); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp);
ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); if (ret) { @@@ -1148,9 -1148,9 +1148,9 @@@ static int axienet_stop(struct net_devi }
/* Do a reset to ensure DMA is really stopped */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp);
cancel_work_sync(&lp->dma_err_task);
@@@ -1709,9 -1709,9 +1709,9 @@@ static void axienet_dma_err_handler(str * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp);
for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; @@@ -1863,39 -1863,22 +1863,39 @@@ static int axienet_probe(struct platfor lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT;
- lp->clk = devm_clk_get_optional(&pdev->dev, NULL); - if (IS_ERR(lp->clk)) { - ret = PTR_ERR(lp->clk); + lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); + if (!lp->axi_clk) { + /* For backward compatibility, if named AXI clock is not present, + * treat the first clock specified as the AXI clock. + */ + lp->axi_clk = devm_clk_get_optional(&pdev->dev, NULL); + } + if (IS_ERR(lp->axi_clk)) { + ret = PTR_ERR(lp->axi_clk); goto free_netdev; } - ret = clk_prepare_enable(lp->clk); + ret = clk_prepare_enable(lp->axi_clk); if (ret) { - dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); + dev_err(&pdev->dev, "Unable to enable AXI clock: %d\n", ret); goto free_netdev; }
+ lp->misc_clks[0].id = "axis_clk"; + lp->misc_clks[1].id = "ref_clk"; + lp->misc_clks[2].id = "mgt_clk"; + + ret = devm_clk_bulk_get_optional(&pdev->dev, XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + + ret = clk_bulk_prepare_enable(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (IS_ERR(lp->regs)) { - dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = PTR_ERR(lp->regs); goto cleanup_clk; } @@@ -2126,8 -2109,7 +2126,8 @@@ cleanup_mdio of_node_put(lp->phy_node);
cleanup_clk: - clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk);
free_netdev: free_netdev(ndev); @@@ -2150,8 -2132,7 +2150,8 @@@ static int axienet_remove(struct platfo
axienet_mdio_teardown(lp);
- clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk);
of_node_put(lp->phy_node); lp->phy_node = NULL; diff --combined drivers/net/geneve.c index 5d7a2b1469f4,d5b1e48e0c09..e3b2375ac5eb --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@@ -461,7 -461,6 +461,7 @@@ static struct socket *geneve_create_soc if (err < 0) return ERR_PTR(err);
+ udp_allow_gso(sock->sk); return sock; }
@@@ -909,8 -908,16 +909,16 @@@ static int geneve_xmit_skb(struct sk_bu
info = skb_tunnel_info(skb); if (info) { - info->key.u.ipv4.dst = fl4.saddr; - info->key.u.ipv4.src = fl4.daddr; + struct ip_tunnel_info *unclone; + + unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) { + dst_release(&rt->dst); + return -ENOMEM; + } + + unclone->key.u.ipv4.dst = fl4.saddr; + unclone->key.u.ipv4.src = fl4.daddr; }
if (!pskb_may_pull(skb, ETH_HLEN)) { @@@ -994,8 -1001,16 +1002,16 @@@ static int geneve6_xmit_skb(struct sk_b struct ip_tunnel_info *info = skb_tunnel_info(skb);
if (info) { - info->key.u.ipv6.dst = fl6.saddr; - info->key.u.ipv6.src = fl6.daddr; + struct ip_tunnel_info *unclone; + + unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) { + dst_release(dst); + return -ENOMEM; + } + + unclone->key.u.ipv6.dst = fl6.saddr; + unclone->key.u.ipv6.src = fl6.daddr; }
if (!pskb_may_pull(skb, ETH_HLEN)) { diff --combined drivers/net/tun.c index 6e55697315de,4cf38be26dc9..36443d506b67 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@@ -69,6 -69,14 +69,14 @@@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/mutex.h> + #include <linux/ieee802154.h> + #include <linux/if_ltalk.h> + #include <uapi/linux/if_fddi.h> + #include <uapi/linux/if_hippi.h> + #include <uapi/linux/if_fc.h> + #include <net/ax25.h> + #include <net/rose.h> + #include <net/6lowpan.h>
#include <linux/uaccess.h> #include <linux/proc_fs.h> @@@ -1181,7 -1189,8 +1189,7 @@@ static int tun_xdp_xmit(struct net_devi struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile; u32 numqueues; - int drops = 0; - int cnt = n; + int nxmit = 0; int i;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@@ -1211,9 -1220,9 +1219,9 @@@ resample
if (__ptr_ring_produce(&tfile->tx_ring, frame)) { atomic_long_inc(&dev->tx_dropped); - xdp_return_frame_rx_napi(xdp); - drops++; + break; } + nxmit++; } spin_unlock(&tfile->tx_ring.producer_lock);
@@@ -1221,21 -1230,17 +1229,21 @@@ __tun_xdp_flush_tfile(tfile);
rcu_read_unlock(); - return cnt - drops; + return nxmit; }
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) { struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); + int nxmit;
if (unlikely(!frame)) return -EOVERFLOW;
- return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH); + nxmit = tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH); + if (!nxmit) + xdp_return_frame_rx_napi(frame); + return nxmit; }
static const struct net_device_ops tap_netdev_ops = { @@@ -2922,6 -2927,45 +2930,45 @@@ static int tun_set_ebpf(struct tun_stru return __tun_set_ebpf(tun, prog_p, prog); }
+ /* Return correct value for tun->dev->addr_len based on tun->dev->type. */ + static unsigned char tun_get_addr_len(unsigned short type) + { + switch (type) { + case ARPHRD_IP6GRE: + case ARPHRD_TUNNEL6: + return sizeof(struct in6_addr); + case ARPHRD_IPGRE: + case ARPHRD_TUNNEL: + case ARPHRD_SIT: + return 4; + case ARPHRD_ETHER: + return ETH_ALEN; + case ARPHRD_IEEE802154: + case ARPHRD_IEEE802154_MONITOR: + return IEEE802154_EXTENDED_ADDR_LEN; + case ARPHRD_PHONET_PIPE: + case ARPHRD_PPP: + case ARPHRD_NONE: + return 0; + case ARPHRD_6LOWPAN: + return EUI64_ADDR_LEN; + case ARPHRD_FDDI: + return FDDI_K_ALEN; + case ARPHRD_HIPPI: + return HIPPI_ALEN; + case ARPHRD_IEEE802: + return FC_ALEN; + case ARPHRD_ROSE: + return ROSE_ADDR_LEN; + case ARPHRD_NETROM: + return AX25_ADDR_LEN; + case ARPHRD_LOCALTLK: + return LTALK_ALEN; + default: + return 0; + } + } + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@@ -3085,6 -3129,7 +3132,7 @@@ break; } tun->dev->type = (int) arg; + tun->dev->addr_len = tun_get_addr_len(tun->dev->type); netif_info(tun, drv, tun->dev, "linktype set to %d\n", tun->dev->type); call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, diff --combined drivers/net/virtio_net.c index bb4ea9dbc16b,0824e6999e49..101659cd4b87 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@@ -195,9 -195,6 +195,9 @@@ struct virtnet_info /* # of XDP queue pairs currently used by the driver */ u16 xdp_queue_pairs;
+ /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ + bool xdp_enabled; + /* I like... big packets and I cannot lie! */ bool big_packets;
@@@ -409,9 -406,13 +409,13 @@@ static struct sk_buff *page_to_skb(stru offset += hdr_padded_len; p += hdr_padded_len;
- copy = len; - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); + /* Copy all frame if it fits skb->head, otherwise + * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. + */ + if (len <= skb_tailroom(skb)) + copy = len; + else + copy = ETH_HLEN + metasize; skb_put_data(skb, p, copy);
if (metasize) { @@@ -484,41 -485,12 +488,41 @@@ static int __virtnet_xdp_xmit_one(struc return 0; }
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) -{ - unsigned int qp; - - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - return &vi->sq[qp]; +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on + * the current cpu, so it does not need to be locked. + * + * Here we use marco instead of inline functions because we have to deal with + * three issues at the same time: 1. the choice of sq. 2. judge and execute the + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline + * functions to perfectly solve these three problems at the same time. + */ +#define virtnet_xdp_get_sq(vi) ({ \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + unsigned int qp; \ + \ + if (v->curr_queue_pairs > nr_cpu_ids) { \ + qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ + qp += smp_processor_id(); \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_acquire(txq); \ + } else { \ + qp = smp_processor_id() % v->curr_queue_pairs; \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_lock(txq, raw_smp_processor_id()); \ + } \ + v->sq + qp; \ +}) + +#define virtnet_xdp_put_sq(vi, q) { \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + \ + txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ + if (v->curr_queue_pairs > nr_cpu_ids) \ + __netif_tx_release(txq); \ + else \ + __netif_tx_unlock(txq); \ }
static int virtnet_xdp_xmit(struct net_device *dev, @@@ -531,10 -503,10 +535,10 @@@ unsigned int len; int packets = 0; int bytes = 0; - int drops = 0; + int nxmit = 0; int kicks = 0; - int ret, err; void *ptr; + int ret; int i;
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this @@@ -544,10 -516,11 +548,10 @@@ if (!xdp_prog) return -ENXIO;
- sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; - drops = n; goto out; }
@@@ -570,11 -543,13 +574,11 @@@ for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i];
- err = __virtnet_xdp_xmit_one(vi, sq, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) + break; + nxmit++; } - ret = n - drops; + ret = nxmit;
if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) @@@ -585,17 -560,16 +589,17 @@@ out sq->stats.bytes += bytes; sq->stats.packets += packets; sq->stats.xdp_tx += n; - sq->stats.xdp_tx_drops += drops; + sq->stats.xdp_tx_drops += n - nxmit; sq->stats.kicks += kicks; u64_stats_update_end(&sq->stats.syncp);
+ virtnet_xdp_put_sq(vi, sq); return ret; }
static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { - return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; + return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; }
/* We copy the packet for XDP in the following cases: @@@ -739,9 -713,7 +743,9 @@@ static struct sk_buff *receive_small(st if (unlikely(!xdpf)) goto err_xdp; err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); - if (unlikely(err < 0)) { + if (unlikely(!err)) { + xdp_return_frame_rx_napi(xdpf); + } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } @@@ -928,9 -900,7 +932,9 @@@ static struct sk_buff *receive_mergeabl if (unlikely(!xdpf)) goto err_xdp; err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); - if (unlikely(err < 0)) { + if (unlikely(!err)) { + xdp_return_frame_rx_napi(xdpf); + } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) put_page(xdp_page); @@@ -1492,13 -1462,12 +1496,13 @@@ static int virtnet_poll(struct napi_str xdp_do_flush();
if (xdp_xmit & VIRTIO_XDP_TX) { - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); sq->stats.kicks++; u64_stats_update_end(&sq->stats.syncp); } + virtnet_xdp_put_sq(vi, sq); }
return received; @@@ -2016,7 -1985,7 +2020,7 @@@ static void virtnet_set_affinity(struc } virtqueue_set_affinity(vi->rq[i].vq, mask); virtqueue_set_affinity(vi->sq[i].vq, mask); - __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); + __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); cpumask_clear(mask); }
@@@ -2139,21 -2108,25 +2143,21 @@@ static int virtnet_set_channels(struct static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct virtnet_info *vi = netdev_priv(dev); - char *p = (char *)data; unsigned int i, j; + u8 *p = data;
switch (stringset) { case ETH_SS_STATS: for (i = 0; i < vi->curr_queue_pairs; i++) { - for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", - i, virtnet_rq_stats_desc[j].desc); - p += ETH_GSTRING_LEN; - } + for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) + ethtool_sprintf(&p, "rx_queue_%u_%s", i, + virtnet_rq_stats_desc[j].desc); }
for (i = 0; i < vi->curr_queue_pairs; i++) { - for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s", - i, virtnet_sq_stats_desc[j].desc); - p += ETH_GSTRING_LEN; - } + for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) + ethtool_sprintf(&p, "tx_queue_%u_%s", i, + virtnet_sq_stats_desc[j].desc); } break; } @@@ -2449,9 -2422,10 +2453,9 @@@ static int virtnet_xdp_set(struct net_d
/* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { - NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); - netdev_warn(dev, "request %i queues but max is %i\n", + netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", curr_qp + xdp_qp, vi->max_queue_pairs); - return -ENOMEM; + xdp_qp = 0; }
old_prog = rtnl_dereference(vi->rq[0].xdp_prog); @@@ -2485,14 -2459,11 +2489,14 @@@ vi->xdp_queue_pairs = xdp_qp;
if (prog) { + vi->xdp_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + } else { + vi->xdp_enabled = false; }
for (i = 0; i < vi->max_queue_pairs; i++) { @@@ -2560,7 -2531,7 +2564,7 @@@ static int virtnet_set_features(struct int err;
if ((dev->features ^ features) & NETIF_F_LRO) { - if (vi->xdp_queue_pairs) + if (vi->xdp_enabled) return -EBUSY;
if (features & NETIF_F_LRO) @@@ -3006,8 -2977,7 +3010,8 @@@ static int virtnet_probe(struct virtio_ return -ENOMEM;
/* Set up network device as normal. */ - dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; + dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | + IFF_TX_SKB_NO_LINEAR; dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA;
diff --combined drivers/net/vxlan.c index 39ee1300cdd9,53dbc67e8a34..02a14f1b938a --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@@ -2725,12 -2725,17 +2725,17 @@@ static void vxlan_xmit_one(struct sk_bu goto tx_error; } else if (err) { if (info) { + struct ip_tunnel_info *unclone; struct in_addr src, dst;
+ unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) + goto tx_error; + src = remote_ip.sin.sin_addr; dst = local_ip.sin.sin_addr; - info->key.u.ipv4.src = src.s_addr; - info->key.u.ipv4.dst = dst.s_addr; + unclone->key.u.ipv4.src = src.s_addr; + unclone->key.u.ipv4.dst = dst.s_addr; } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); dst_release(ndst); @@@ -2781,12 -2786,17 +2786,17 @@@ goto tx_error; } else if (err) { if (info) { + struct ip_tunnel_info *unclone; struct in6_addr src, dst;
+ unclone = skb_tunnel_info_unclone(skb); + if (unlikely(!unclone)) + goto tx_error; + src = remote_ip.sin6.sin6_addr; dst = local_ip.sin6.sin6_addr; - info->key.u.ipv6.src = src; - info->key.u.ipv6.dst = dst; + unclone->key.u.ipv6.src = src; + unclone->key.u.ipv6.dst = dst; }
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); @@@ -3484,7 -3494,6 +3494,7 @@@ static struct socket *vxlan_create_sock if (err < 0) return ERR_PTR(err);
+ udp_allow_gso(sock->sk); return sock; }
@@@ -3704,7 -3713,6 +3714,7 @@@ static int vxlan_config_validate(struc #if IS_ENABLED(CONFIG_IPV6) if (use_ipv6) { struct inet6_dev *idev = __in6_dev_get(lowerdev); + if (idev && idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 support disabled by administrator"); diff --combined include/linux/avf/virtchnl.h index 47482049f640,532bcbfc4716..40dd6afbfd81 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@@ -136,9 -136,6 +136,9 @@@ enum virtchnl_ops VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, + /* opcode 34 - 46 are reserved */ + VIRTCHNL_OP_ADD_FDIR_FILTER = 47, + VIRTCHNL_OP_DEL_FDIR_FILTER = 48, };
/* These macros are used to generate compilation errors if a structure/union @@@ -250,7 -247,6 +250,7 @@@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_ #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 +#define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000
/* Define below the capability flags that are not offloads */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 @@@ -480,7 -476,6 +480,6 @@@ struct virtchnl_rss_key u16 vsi_id; u16 key_len; u8 key[1]; /* RSS hash key, packed bytes */ - u8 pad[1]; };
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); @@@ -489,7 -484,6 +488,6 @@@ struct virtchnl_rss_lut u16 vsi_id; u16 lut_entries; u8 lut[1]; /* RSS lookup table */ - u8 pad[1]; };
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); @@@ -563,11 -557,6 +561,11 @@@ enum virtchnl_action /* action types */ VIRTCHNL_ACTION_DROP = 0, VIRTCHNL_ACTION_TC_REDIRECT, + VIRTCHNL_ACTION_PASSTHRU, + VIRTCHNL_ACTION_QUEUE, + VIRTCHNL_ACTION_Q_REGION, + VIRTCHNL_ACTION_MARK, + VIRTCHNL_ACTION_COUNT, };
enum virtchnl_flow_type { @@@ -677,269 -666,6 +675,269 @@@ enum virtchnl_vfr_states VIRTCHNL_VFR_VFACTIVE, };
+#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 +#define PROTO_HDR_SHIFT 5 +#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) +#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1) + +/* VF use these macros to configure each protocol header. + * Specify which protocol headers and protocol header fields base on + * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field. + * @param hdr: a struct of virtchnl_proto_hdr + * @param hdr_type: ETH/IPV4/TCP, etc + * @param field: SRC/DST/TEID/SPI, etc + */ +#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \ + ((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr) ((hdr)->field_selector) + +#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) +#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) + +#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \ + ((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type) +#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \ + (((hdr)->type) >> PROTO_HDR_SHIFT) +#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \ + ((hdr)->type == ((val) >> PROTO_HDR_SHIFT)) +#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \ + (VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \ + VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val))) + +/* Protocol header type within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization. + */ +enum virtchnl_proto_hdr_type { + VIRTCHNL_PROTO_HDR_NONE, + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_S_VLAN, + VIRTCHNL_PROTO_HDR_C_VLAN, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_TCP, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_SCTP, + VIRTCHNL_PROTO_HDR_GTPU_IP, + VIRTCHNL_PROTO_HDR_GTPU_EH, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + VIRTCHNL_PROTO_HDR_PPPOE, + VIRTCHNL_PROTO_HDR_L2TPV3, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_AH, + VIRTCHNL_PROTO_HDR_PFCP, +}; + +/* Protocol header field within a protocol header. */ +enum virtchnl_proto_hdr_field { + /* ETHER */ + VIRTCHNL_PROTO_HDR_ETH_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH), + VIRTCHNL_PROTO_HDR_ETH_DST, + VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, + /* S-VLAN */ + VIRTCHNL_PROTO_HDR_S_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN), + /* C-VLAN */ + VIRTCHNL_PROTO_HDR_C_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN), + /* IPV4 */ + VIRTCHNL_PROTO_HDR_IPV4_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4), + VIRTCHNL_PROTO_HDR_IPV4_DST, + VIRTCHNL_PROTO_HDR_IPV4_DSCP, + VIRTCHNL_PROTO_HDR_IPV4_TTL, + VIRTCHNL_PROTO_HDR_IPV4_PROT, + /* IPV6 */ + VIRTCHNL_PROTO_HDR_IPV6_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6), + VIRTCHNL_PROTO_HDR_IPV6_DST, + VIRTCHNL_PROTO_HDR_IPV6_TC, + VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, + VIRTCHNL_PROTO_HDR_IPV6_PROT, + /* TCP */ + VIRTCHNL_PROTO_HDR_TCP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP), + VIRTCHNL_PROTO_HDR_TCP_DST_PORT, + /* UDP */ + VIRTCHNL_PROTO_HDR_UDP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP), + VIRTCHNL_PROTO_HDR_UDP_DST_PORT, + /* SCTP */ + VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP), + VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, + /* GTPU_IP */ + VIRTCHNL_PROTO_HDR_GTPU_IP_TEID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP), + /* GTPU_EH */ + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH), + VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, + /* PPPOE */ + VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE), + /* L2TPV3 */ + VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3), + /* ESP */ + VIRTCHNL_PROTO_HDR_ESP_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP), + /* AH */ + VIRTCHNL_PROTO_HDR_AH_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH), + /* PFCP */ + VIRTCHNL_PROTO_HDR_PFCP_S_FIELD = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP), + VIRTCHNL_PROTO_HDR_PFCP_SEID, +}; + +struct virtchnl_proto_hdr { + enum virtchnl_proto_hdr_type type; + u32 field_selector; /* a bit mask to select field for header type */ + u8 buffer[64]; + /** + * binary buffer in network order for specific header type. + * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4 + * header is expected to be copied into the buffer. + */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr); + +struct virtchnl_proto_hdrs { + u8 tunnel_level; + /** + * specify where protocol header start from. + * 0 - from the outer layer + * 1 - from the first inner layer + * 2 - from the second inner layer + * .... + **/ + int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); + +/* action configuration for FDIR */ +struct virtchnl_filter_action { + enum virtchnl_action type; + union { + /* used for queue and qgroup action */ + struct { + u16 index; + u8 region; + } queue; + /* used for count action */ + struct { + /* share counter ID with other flow rules */ + u8 shared; + u32 id; /* counter ID */ + } count; + /* used for mark action */ + u32 mark_id; + u8 reserve[32]; + } act_conf; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); + +#define VIRTCHNL_MAX_NUM_ACTIONS 8 + +struct virtchnl_filter_action_set { + /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ + int count; + struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set); + +/* pattern and action for FDIR rule */ +struct virtchnl_fdir_rule { + struct virtchnl_proto_hdrs proto_hdrs; + struct virtchnl_filter_action_set action_set; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule); + +/* Status returned to VF after VF requests FDIR commands + * VIRTCHNL_FDIR_SUCCESS + * VF FDIR related request is successfully done by PF + * The request can be OP_ADD/DEL. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE + * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource. + * + * VIRTCHNL_FDIR_FAILURE_RULE_EXIST + * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed. + * + * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT + * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST + * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist. + * + * VIRTCHNL_FDIR_FAILURE_RULE_INVALID + * OP_ADD_FDIR_FILTER request is failed due to parameters validation + * or HW doesn't support. + * + * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT + * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out + * for programming. + */ +enum virtchnl_fdir_prgm_status { + VIRTCHNL_FDIR_SUCCESS = 0, + VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE, + VIRTCHNL_FDIR_FAILURE_RULE_EXIST, + VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT, + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST, + VIRTCHNL_FDIR_FAILURE_RULE_INVALID, + VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT, +}; + +/* VIRTCHNL_OP_ADD_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id, + * validate_only and rule_cfg. PF will return flow_id + * if the request is successfully done and return add_status to VF. + */ +struct virtchnl_fdir_add { + u16 vsi_id; /* INPUT */ + /* + * 1 for validating a fdir rule, 0 for creating a fdir rule. + * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER. + */ + u16 validate_only; /* INPUT */ + u32 flow_id; /* OUTPUT */ + struct virtchnl_fdir_rule rule_cfg; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add); + +/* VIRTCHNL_OP_DEL_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id + * and flow_id. PF will return del_status to VF. + */ +struct virtchnl_fdir_del { + u16 vsi_id; /* INPUT */ + u16 pad; + u32 flow_id; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); + /** * virtchnl_vc_validate_vf_msg * @ver: Virtchnl version info @@@ -1100,12 -826,6 +1098,12 @@@ virtchnl_vc_validate_vf_msg(struct virt case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_add); + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_del); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --combined include/linux/bpf.h index 9fdd839b418c,fdac0534ce79..c9b7a876b0c8 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -40,7 -40,7 +40,8 @@@ struct bpf_local_storage struct bpf_local_storage_map; struct kobject; struct mem_cgroup; + struct module; +struct bpf_func_state;
extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@@ -56,7 -56,7 +57,7 @@@ struct bpf_iter_seq_info u32 seq_priv_size; };
-/* map is generic key/value storage optionally accesible by eBPF programs */ +/* map is generic key/value storage optionally accessible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ int (*map_alloc_check)(union bpf_attr *attr); @@@ -119,9 -119,6 +120,9 @@@ void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+ /* Misc helpers.*/ + int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure * an inner map can be inserted to an outer map. @@@ -134,13 -131,6 +135,13 @@@ bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1);
+ + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@@ -307,8 -297,6 +308,8 @@@ enum bpf_arg_type ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, };
@@@ -425,9 -413,6 +426,9 @@@ enum bpf_reg_type PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ + __BPF_REG_TYPE_MAX, };
/* The information passed from prog-specific *_is_valid_access @@@ -481,7 -466,6 +482,7 @@@ struct bpf_verifier_ops const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); + bool (*check_kfunc_call)(u32 kfunc_btf_id); };
struct bpf_prog_offload_ops { @@@ -524,11 -508,6 +525,11 @@@ enum bpf_cgroup_storage_type */ #define MAX_BPF_FUNC_ARGS 12
+/* The maximum number of arguments passed through registers + * a single function may have. + */ +#define MAX_BPF_FUNC_REG_ARGS 5 + struct btf_func_model { u8 ret_size; u8 nr_args; @@@ -645,6 -624,7 +646,7 @@@ struct bpf_trampoline /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; u64 selector; + struct module *mod; };
struct bpf_attach_target_info { @@@ -798,8 -778,6 +800,8 @@@ struct btf_mod_pair struct module *module; };
+struct bpf_kfunc_desc_tab; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@@ -836,7 -814,6 +838,7 @@@ struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; + struct bpf_kfunc_desc_tab *kfunc_tab; u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; @@@ -1111,13 -1088,6 +1113,13 @@@ int bpf_prog_array_copy(struct bpf_prog /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0)
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, + * if bpf_cgroup_storage_set() failed, the rest of programs + * will not execute. This should be a really rare scenario + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of + * preemptions all between bpf_cgroup_storage_set() and + * bpf_cgroup_storage_unset() on the same cpu. + */ #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ ({ \ struct bpf_prog_array_item *_item; \ @@@ -1130,12 -1100,10 +1132,12 @@@ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ func_ret = func(_prog, ctx); \ _ret &= (func_ret & 1); \ *(ret_flags) |= (func_ret >> 1); \ + bpf_cgroup_storage_unset(); \ _item++; \ } \ rcu_read_unlock(); \ @@@ -1156,14 -1124,9 +1158,14 @@@ goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - if (set_cg_storage) \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ - _ret &= func(_prog, ctx); \ + if (!set_cg_storage) { \ + _ret &= func(_prog, ctx); \ + } else { \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ + _ret &= func(_prog, ctx); \ + bpf_cgroup_storage_unset(); \ + } \ _item++; \ } \ _out: \ @@@ -1436,10 -1399,6 +1438,10 @@@ void bpf_iter_map_show_fdinfo(const str int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info);
+int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@@ -1489,9 -1448,9 +1491,9 @@@ struct btf *bpf_get_btf_vmlinux(void) /* Map specifics */ struct xdp_buff; struct sk_buff; +struct bpf_dtab_netdev; +struct bpf_cpu_map_entry;
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); void __dev_flush(void); int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx); @@@ -1501,6 -1460,7 +1503,6 @@@ int dev_map_generic_redirect(struct bpf struct bpf_prog *xdp_prog); bool dev_map_can_have_prog(struct bpf_map *map);
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@@ -1529,10 -1489,6 +1531,10 @@@ int bpf_prog_test_run_flow_dissector(st int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@@ -1551,11 -1507,8 +1553,11 @@@ int btf_distill_func_proto(struct bpf_v struct btf_func_model *m);
struct bpf_reg_state; -int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); +int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); +int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@@ -1565,11 -1518,6 +1567,11 @@@ struct bpf_prog *bpf_prog_by_id(u32 id) struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +void bpf_task_storage_free(struct task_struct *task); +bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); +const struct btf_func_model * +bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@@ -1639,6 -1587,17 +1641,6 @@@ static inline int bpf_obj_get_user(cons return -EOPNOTSUPP; }
-static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} - -static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} static inline bool dev_map_can_have_prog(struct bpf_map *map) { return false; @@@ -1650,7 -1609,6 +1652,7 @@@ static inline void __dev_flush(void
struct xdp_buff; struct bpf_dtab_netdev; +struct bpf_cpu_map_entry;
static inline int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, @@@ -1675,6 -1633,12 +1677,6 @@@ static inline int dev_map_generic_redir return 0; }
-static inline -struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) -{ - return NULL; -} - static inline void __cpu_map_flush(void) { } @@@ -1725,18 -1689,6 +1727,18 @@@ static inline int bpf_prog_test_run_flo return -ENOTSUPP; }
+static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + +static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +{ + return false; +} + static inline void bpf_map_put(struct bpf_map *map) { } @@@ -1751,22 -1703,6 +1753,22 @@@ bpf_base_func_proto(enum bpf_func_id fu { return NULL; } + +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} + +static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) +{ + return false; +} + +static inline const struct btf_func_model * +bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn) +{ + return NULL; +} #endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@@ -1851,24 -1787,22 +1853,24 @@@ static inline void bpf_map_offload_map_ } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-#if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); + +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); #else -static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, - struct bpf_prog *old, u32 which) +static inline void bpf_sk_reuseport_detach(struct sock *sk) { - return -EOPNOTSUPP; }
+#ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { @@@ -1886,7 -1820,20 +1888,7 @@@ static inline int sock_map_update_elem_ { return -EOPNOTSUPP; } -#endif /* CONFIG_BPF_STREAM_PARSER */
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -void bpf_sk_reuseport_detach(struct sock *sk); -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); -#else -static inline void bpf_sk_reuseport_detach(struct sock *sk) -{ -} - -#ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { @@@ -1958,9 -1905,6 +1960,9 @@@ extern const struct bpf_func_proto bpf_ extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; +extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --combined include/linux/ethtool.h index 5c631a298994,cdca84e6dd6b..4290e2fa3117 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@@ -87,9 -87,7 +87,7 @@@ u32 ethtool_op_get_link(struct net_devi int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti);
- /** - * struct ethtool_link_ext_state_info - link extended state and substate. - */ + /* Link extended state and substate. */ struct ethtool_link_ext_state_info { enum ethtool_link_ext_state link_ext_state; union { @@@ -129,7 -127,6 +127,6 @@@ struct ethtool_link_ksettings __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; u32 lanes; - enum ethtool_link_mode_bit_indices link_mode; };
/** @@@ -292,6 -289,9 +289,9 @@@ struct ethtool_pause_stats * do not attach ext_substate attribute to netlink message). If link_ext_state * and link_ext_substate are unknown, return -ENODATA. If not implemented, * link_ext_state and link_ext_substate will not be sent to userspace. + * @get_eeprom_len: Read range of EEPROM addresses for validation of + * @get_eeprom and @set_eeprom requests. + * Returns 0 if device does not support EEPROM access. * @get_eeprom: Read data from the device EEPROM. * Should fill in the magic field. Don't need to check len for zero * or wraparound. Fill in the data argument with the eeprom values @@@ -384,6 -384,8 +384,8 @@@ * @get_module_eeprom: Get the eeprom information from the plug-in module * @get_eee: Get Energy-Efficient (EEE) supported and status. * @set_eee: Set EEE status (enable/disable) as well as LPI timers. + * @get_tunable: Read the value of a driver / device tunable. + * @set_tunable: Set the value of a driver / device tunable. * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue. * It must check that the given queue number is valid. If neither a RX nor * a TX queue has this number, return -EINVAL. If only a RX queue or a TX @@@ -410,8 -412,6 +412,8 @@@ * @get_ethtool_phy_stats: Return extended statistics about the PHY device. * This is only useful if the device maintains PHY statistics and * cannot use the standard PHY library helpers. + * @get_phy_tunable: Read the value of a PHY tunable. + * @set_phy_tunable: Set the value of a PHY tunable. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@@ -549,8 -549,8 +551,8 @@@ struct phy_tdr_config * @get_sset_count: Get number of strings that @get_strings will write. * @get_strings: Return a set of strings that describe the requested objects * @get_stats: Return extended statistics about the PHY device. - * @start_cable_test - Start a cable test - * @start_cable_test_tdr - Start a Time Domain Reflectometry cable test + * @start_cable_test: Start a cable test + * @start_cable_test_tdr: Start a Time Domain Reflectometry cable test * * All operations are optional (i.e. the function pointer may be set to %NULL) * and callers must take this into account. Callers must hold the RTNL lock. @@@ -573,13 -573,12 +575,22 @@@ struct ethtool_phy_ops */ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
-/* ++/** + * ethtool_params_from_link_mode - Derive link parameters from a given link mode + * @link_ksettings: Link parameters to be derived from the link mode + * @link_mode: Link mode + */ + void + ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, + enum ethtool_link_mode_bit_indices link_mode); ++ +/** + * ethtool_sprintf - Write formatted string to ethtool string data + * @data: Pointer to start of string to update + * @fmt: Format of string to write + * + * Write formatted string to data. Update data to point at start of + * next string. + */ +extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); #endif /* _LINUX_ETHTOOL_H */ diff --combined include/linux/mlx5/mlx5_ifc.h index 432290b58a0b,9c68b2da14c6..1599deee0456 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@@ -437,11 -437,11 +437,11 @@@ struct mlx5_ifc_flow_table_prop_layout_ u8 reserved_at_60[0x18]; u8 log_max_ft_num[0x8];
- u8 reserved_at_80[0x18]; + u8 reserved_at_80[0x10]; + u8 log_max_flow_counter[0x8]; u8 log_max_destination[0x8];
- u8 log_max_flow_counter[0x8]; - u8 reserved_at_a8[0x10]; + u8 reserved_at_a0[0x18]; u8 log_max_flow[0x8];
u8 reserved_at_c0[0x40]; @@@ -806,11 -806,9 +806,11 @@@ struct mlx5_ifc_e_switch_cap_bits u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x3]; + u8 reserved_at_5[0x2]; + u8 esw_shared_ingress_acl[0x1]; u8 esw_uplink_ingress_acl[0x1]; - u8 reserved_at_9[0x10]; + u8 root_ft_on_other_esw[0x1]; + u8 reserved_at_a[0xf]; u8 esw_functions_changed[0x1]; u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; @@@ -1504,8 -1502,7 +1504,8 @@@ struct mlx5_ifc_cmd_hca_cap_bits u8 reserved_at_270[0x6]; u8 lag_dct[0x2]; u8 lag_tx_port_affinity[0x1]; - u8 reserved_at_279[0x2]; + u8 lag_native_fdb_selection[0x1]; + u8 reserved_at_27a[0x1]; u8 lag_master[0x1]; u8 num_lag_ports[0x4];
@@@ -1683,16 -1680,7 +1683,16 @@@ u8 reserved_at_6e0[0x10]; u8 sf_base_id[0x10];
- u8 reserved_at_700[0x80]; + u8 reserved_at_700[0x8]; + u8 num_total_dynamic_vf_msix[0x18]; + u8 reserved_at_720[0x14]; + u8 dynamic_msix_table_size[0xc]; + u8 reserved_at_740[0xc]; + u8 min_dynamic_vf_msix_table_size[0x4]; + u8 reserved_at_750[0x4]; + u8 max_dynamic_vf_msix_table_size[0xc]; + + u8 reserved_at_760[0x20]; u8 vhca_tunnel_commands[0x40]; u8 reserved_at_7c0[0x40]; }; @@@ -8847,6 -8835,8 +8847,8 @@@ struct mlx5_ifc_pplm_reg_bits
u8 fec_override_admin_100g_2x[0x10]; u8 fec_override_admin_50g_1x[0x10]; + + u8 reserved_at_140[0x140]; };
struct mlx5_ifc_ppcnt_reg_bits { @@@ -10048,19 -10038,14 +10050,19 @@@ struct mlx5_ifc_set_flow_table_root_in_ u8 reserved_at_60[0x20];
u8 table_type[0x8]; - u8 reserved_at_88[0x18]; + u8 reserved_at_88[0x7]; + u8 table_of_other_vport[0x1]; + u8 table_vport_number[0x10];
u8 reserved_at_a0[0x8]; u8 table_id[0x18];
u8 reserved_at_c0[0x8]; u8 underlay_qpn[0x18]; - u8 reserved_at_e0[0x120]; + u8 table_eswitch_owner_vhca_id_valid[0x1]; + u8 reserved_at_e1[0xf]; + u8 table_eswitch_owner_vhca_id[0x10]; + u8 reserved_at_100[0x100]; };
enum { @@@ -10215,7 -10200,7 +10217,7 @@@ struct mlx5_ifc_pbmc_reg_bits
struct mlx5_ifc_bufferx_reg_bits buffer[10];
- u8 reserved_at_2e0[0x40]; + u8 reserved_at_2e0[0x80]; };
struct mlx5_ifc_qtct_reg_bits { @@@ -10290,8 -10275,7 +10292,8 @@@ struct mlx5_ifc_dcbx_param_bits };
struct mlx5_ifc_lagc_bits { - u8 reserved_at_0[0x1d]; + u8 fdb_selection_mode[0x1]; + u8 reserved_at_1[0x1c]; u8 lag_state[0x3];
u8 reserved_at_20[0x14]; diff --combined include/linux/skmsg.h index f78e90a04a69,822c048934e3..e242bf3d2b4a --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@@ -56,8 -56,7 +56,8 @@@ struct sk_msg
struct sk_psock_progs { struct bpf_prog *msg_parser; - struct bpf_prog *skb_parser; + struct bpf_prog *stream_parser; + struct bpf_prog *stream_verdict; struct bpf_prog *skb_verdict; };
@@@ -71,6 -70,12 +71,6 @@@ struct sk_psock_link void *link_raw; };
-struct sk_psock_parser { - struct strparser strp; - bool enabled; - void (*saved_data_ready)(struct sock *sk); -}; - struct sk_psock_work_state { struct sk_buff *skb; u32 len; @@@ -85,12 -90,9 +85,12 @@@ struct sk_psock u32 eval; struct sk_msg *cork; struct sk_psock_progs progs; - struct sk_psock_parser parser; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + struct strparser strp; +#endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; + spinlock_t ingress_lock; unsigned long state; struct list_head link; spinlock_t link_lock; @@@ -98,13 -100,13 +98,13 @@@ void (*saved_unhash)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); + void (*saved_data_ready)(struct sock *sk); + int (*psock_update_sk_prot)(struct sock *sk, bool restore); struct proto *sk_proto; + struct mutex work_mutex; struct sk_psock_work_state work_state; struct work_struct work; - union { - struct rcu_head rcu; - struct work_struct gc; - }; + struct rcu_work rwork; };
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, @@@ -125,10 -127,6 +125,10 @@@ int sk_msg_zerocopy_from_iter(struct so struct sk_msg *msg, u32 bytes); int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); +int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, + long timeo, int *err); +int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + int len, int flags);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { @@@ -289,45 -287,7 +289,45 @@@ static inline struct sk_psock *sk_psock static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { + spin_lock_bh(&psock->ingress_lock); list_add_tail(&msg->list, &psock->ingress_msg); + spin_unlock_bh(&psock->ingress_lock); +} + +static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) +{ + struct sk_msg *msg; + + spin_lock_bh(&psock->ingress_lock); + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + if (msg) + list_del(&msg->list); + spin_unlock_bh(&psock->ingress_lock); + return msg; +} + +static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) +{ + struct sk_msg *msg; + + spin_lock_bh(&psock->ingress_lock); + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + spin_unlock_bh(&psock->ingress_lock); + return msg; +} + +static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, + struct sk_msg *msg) +{ + struct sk_msg *ret; + + spin_lock_bh(&psock->ingress_lock); + if (list_is_last(&msg->list, &psock->ingress_msg)) + ret = NULL; + else + ret = list_next_entry(msg, list); + spin_unlock_bh(&psock->ingress_lock); + return ret; }
static inline bool sk_psock_queue_empty(const struct sk_psock *psock) @@@ -335,13 -295,6 +335,13 @@@ return psock ? list_empty(&psock->ingress_msg) : true; }
+static inline void kfree_sk_msg(struct sk_msg *msg) +{ + if (msg->skb) + consume_skb(msg->skb); + kfree(msg); +} + static inline void sk_psock_report_error(struct sk_psock *psock, int err) { struct sock *sk = psock->sk; @@@ -351,27 -304,10 +351,27 @@@ }
struct sk_psock *sk_psock_init(struct sock *sk, int node); +void sk_psock_stop(struct sk_psock *psock, bool wait);
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); +#else +static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) +{ + return -EOPNOTSUPP; +} + +static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) +{ +} + +static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) +{ +} +#endif + void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
@@@ -391,6 -327,8 +391,6 @@@ static inline void sk_psock_free_link(s
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock); - static inline void sk_psock_cork_free(struct sk_psock *psock) { if (psock->cork) { @@@ -400,12 -338,30 +400,11 @@@ } }
-static inline void sk_psock_update_proto(struct sock *sk, - struct sk_psock *psock, - struct proto *ops) -{ - /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, ops); -} - static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { - sk->sk_prot->unhash = psock->saved_unhash; - if (inet_csk_has_ulp(sk)) { - /* TLS does not have an unhash proto in SW cases, but we need - * to ensure we stop using the sock_map unhash routine because - * the associated psock is being removed. So use the original - * unhash handler. - */ - WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); - } else { - sk->sk_write_space = psock->saved_write_space; - /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, psock->sk_proto); - } + if (psock->psock_update_sk_prot) + psock->psock_update_sk_prot(sk, true); }
static inline void sk_psock_set_state(struct sk_psock *psock, @@@ -438,6 -394,7 +437,6 @@@ static inline struct sk_psock *sk_psock return psock; }
-void sk_psock_stop(struct sock *sk, struct sk_psock *psock); void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) @@@ -448,8 -405,8 +447,8 @@@
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { - if (psock->parser.enabled) - psock->parser.saved_data_ready(sk); + if (psock->saved_data_ready) + psock->saved_data_ready(sk); else sk->sk_data_ready(sk); } @@@ -478,8 -435,7 +477,8 @@@ static inline int psock_replace_prog(st static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); - psock_set_prog(&progs->skb_parser, NULL); + psock_set_prog(&progs->stream_parser, NULL); + psock_set_prog(&progs->stream_verdict, NULL); psock_set_prog(&progs->skb_verdict, NULL); }
@@@ -489,44 -445,6 +488,44 @@@ static inline bool sk_psock_strp_enable { if (!psock) return false; - return psock->parser.enabled; + return !!psock->saved_data_ready; +} + +#if IS_ENABLED(CONFIG_NET_SOCK_MSG) + +/* We only have one bit so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) + +static inline bool skb_bpf_ingress(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_INGRESS; +} + +static inline void skb_bpf_set_ingress(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_INGRESS; +} + +static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir, + bool ingress) +{ + skb->_sk_redir = (unsigned long)sk_redir; + if (ingress) + skb->_sk_redir |= BPF_F_INGRESS; +} + +static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return (struct sock *)(sk_redir & BPF_F_PTR_MASK); +} + +static inline void skb_bpf_redirect_clear(struct sk_buff *skb) +{ + skb->_sk_redir = 0; } +#endif /* CONFIG_NET_SOCK_MSG */ #endif /* _LINUX_SKMSG_H */ diff --combined include/net/sock.h index 8b4155e756c2,8487f58da36d..cadcc12cc316 --- a/include/net/sock.h +++ b/include/net/sock.h @@@ -934,9 -934,13 +934,13 @@@ static inline void sk_acceptq_added(str WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); }
+ /* Note: If you think the test should be: + * return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog); + * Then please take a look at commit 64a146513f8f ("[NET]: Revert incorrect accept queue backlog changes.") + */ static inline bool sk_acceptq_is_full(const struct sock *sk) { - return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog); + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); }
/* @@@ -1184,9 -1188,6 +1188,9 @@@ struct proto void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); +#ifdef CONFIG_BPF_SYSCALL + int (*psock_update_sk_prot)(struct sock *sk, bool restore); +#endif
/* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@@ -2224,6 -2225,15 +2228,15 @@@ static inline void skb_set_owner_r(stru sk_mem_charge(sk, skb->truesize); }
+ static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) + { + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { + skb_orphan(skb); + skb->destructor = sock_efree; + skb->sk = sk; + } + } + void sk_reset_timer(struct sock *sk, struct timer_list *timer, unsigned long expires);
diff --combined include/uapi/linux/ethtool.h index 868b513d4f54,5afea692a3f7..f91e079e3108 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@@ -26,6 -26,14 +26,14 @@@ * have the same layout for 32-bit and 64-bit userland. */
+ /* Note on reserved space. + * Reserved fields must not be accessed directly by user space because + * they may be replaced by a different field in the future. They must + * be initialized to zero before making the request, e.g. via memset + * of the entire structure or implicitly by not being set in a structure + * initializer. + */ + /** * struct ethtool_cmd - DEPRECATED, link control and status * This structure is DEPRECATED, please use struct ethtool_link_settings. @@@ -67,6 -75,7 +75,7 @@@ * and other link features that the link partner advertised * through autonegotiation; 0 if unknown or not applicable. * Read-only. + * @reserved: Reserved for future use; see the note on reserved space. * * The link speed in Mbps is split between @speed and @speed_hi. Use * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to @@@ -155,6 -164,7 +164,7 @@@ static inline __u32 ethtool_cmd_speed(c * @bus_info: Device bus address. This should match the dev_name() * string for the underlying bus device, if there is one. May be * an empty string. + * @reserved2: Reserved for future use; see the note on reserved space. * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and * %ETHTOOL_SPFLAGS commands; also the number of strings in the * %ETH_SS_PRIV_FLAGS set @@@ -356,6 -366,7 +366,7 @@@ struct ethtool_eeprom * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting * its tx lpi (after reaching 'idle' state). Effective only when eee * was negotiated and tx_lpi_enabled was set. + * @reserved: Reserved for future use; see the note on reserved space. */ struct ethtool_eee { __u32 cmd; @@@ -374,6 -385,7 +385,7 @@@ * @cmd: %ETHTOOL_GMODULEINFO * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx * @eeprom_len: Length of the eeprom + * @reserved: Reserved for future use; see the note on reserved space. * * This structure is used to return the information to * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. @@@ -579,9 -591,7 +591,7 @@@ struct ethtool_pauseparam __u32 tx_pause; };
- /** - * enum ethtool_link_ext_state - link extended state - */ + /* Link extended state */ enum ethtool_link_ext_state { ETHTOOL_LINK_EXT_STATE_AUTONEG, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, @@@ -595,10 -605,7 +605,7 @@@ ETHTOOL_LINK_EXT_STATE_OVERHEAT, };
- /** - * enum ethtool_link_ext_substate_autoneg - more information in addition to - * ETHTOOL_LINK_EXT_STATE_AUTONEG. - */ + /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ enum ethtool_link_ext_substate_autoneg { ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, @@@ -608,9 -615,7 +615,7 @@@ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, };
- /** - * enum ethtool_link_ext_substate_link_training - more information in addition to - * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. + /* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. */ enum ethtool_link_ext_substate_link_training { ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, @@@ -619,9 -624,7 +624,7 @@@ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, };
- /** - * enum ethtool_link_ext_substate_logical_mismatch - more information in addition - * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. + /* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. */ enum ethtool_link_ext_substate_link_logical_mismatch { ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, @@@ -631,19 -634,14 +634,14 @@@ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, };
- /** - * enum ethtool_link_ext_substate_bad_signal_integrity - more information in - * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. + /* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. */ enum ethtool_link_ext_substate_bad_signal_integrity { ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, };
- /** - * enum ethtool_link_ext_substate_cable_issue - more information in - * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. - */ + /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ enum ethtool_link_ext_substate_cable_issue { ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, @@@ -661,6 -659,7 +659,7 @@@ * now deprecated * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_TUNABLES: tunable names * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS * @ETH_SS_PHY_TUNABLES: PHY tunable names * @ETH_SS_LINK_MODES: link mode names @@@ -670,6 -669,8 +669,8 @@@ * @ETH_SS_TS_TX_TYPES: timestamping Tx types * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types + * + * @ETH_SS_COUNT: number of defined string sets */ enum ethtool_stringset { ETH_SS_TEST = 0, @@@ -715,6 -716,7 +716,7 @@@ struct ethtool_gstrings /** * struct ethtool_sset_info - string set information * @cmd: Command number = %ETHTOOL_GSSET_INFO + * @reserved: Reserved for future use; see the note on reserved space. * @sset_mask: On entry, a bitmask of string sets to query, with bits * numbered according to &enum ethtool_stringset. On return, a * bitmask of those string sets queried that are supported. @@@ -759,6 -761,7 +761,7 @@@ enum ethtool_test_flags * @flags: A bitmask of flags from &enum ethtool_test_flags. Some * flags may be set by the user on entry; others may be set by * the driver on return. + * @reserved: Reserved for future use; see the note on reserved space. * @len: On return, the number of test results * @data: Array of test results * @@@ -959,6 -962,7 +962,7 @@@ union ethtool_flow_union * @vlan_etype: VLAN EtherType * @vlan_tci: VLAN tag control information * @data: user defined data + * @padding: Reserved for future use; see the note on reserved space. * * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT * is set in &struct ethtool_rx_flow_spec @flow_type. @@@ -1134,7 -1138,8 +1138,8 @@@ struct ethtool_rxfh_indir * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. - * @rsvd: Reserved for future extensions. + * @rsvd8: Reserved for future use; see the note on reserved space. + * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table * of @indir_size __u32 elements, followed by hash key of @key_size * bytes. @@@ -1302,7 -1307,9 +1307,9 @@@ struct ethtool_sfeatures * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @tx_reserved: Reserved for future use; see the note on reserved space. * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + * @rx_reserved: Reserved for future use; see the note on reserved space. * * The bits in the 'tx_types' and 'rx_filters' fields correspond to * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, @@@ -1376,33 -1383,15 +1383,33 @@@ struct ethtool_per_queue_op };
/** - * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * struct ethtool_fecparam - Ethernet Forward Error Correction parameters * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM - * @active_fec: FEC mode which is active on porte - * @fec: Bitmask of supported/configured FEC modes - * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * @active_fec: FEC mode which is active on the port, single bit set, GET only. + * @fec: Bitmask of configured FEC modes. + * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET. * - * Drivers should reject a non-zero setting of @autoneg when - * autoneogotiation is disabled (or not supported) for the link. + * Note that @reserved was never validated on input and ethtool user space + * left it uninitialized when calling SET. Hence going forward it can only be + * used to return a value to userspace with GET. + * + * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS. + * FEC settings are configured by link autonegotiation whenever it's enabled. + * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode. + * + * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings. + * It is recommended that drivers only accept a single bit set in @fec. + * When multiple bits are set in @fec drivers may pick mode in an implementation + * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other + * FEC modes, because it's unclear whether in this case other modes constrain + * AUTO or are independent choices. + * Drivers must reject SET requests if they support none of the requested modes. + * + * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead + * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM. * + * See enum ethtool_fec_config_bits for definition of valid bits for both + * @fec and @active_fec. */ struct ethtool_fecparam { __u32 cmd; @@@ -1414,16 -1403,11 +1421,16 @@@
/** * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration - * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported - * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver - * @ETHTOOL_FEC_OFF: No FEC Mode - * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode - * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not + * be used together with other bits. GET only. + * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually + * based link mode and SFP parameters read from module's + * EEPROM. This bit does _not_ mean autonegotiation. + * @ETHTOOL_FEC_OFF_BIT: No FEC Mode + * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode + * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode + * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet + * Consortium) */ enum ethtool_fec_config_bits { ETHTOOL_FEC_NONE_BIT, @@@ -1981,6 -1965,11 +1988,11 @@@ enum ethtool_reset_flags * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. + * @master_slave_cfg: Master/slave port mode. + * @master_slave_state: Master/slave port state. + * @reserved: Reserved for future use; see the note on reserved space. + * @reserved1: Reserved for future use; see the note on reserved space. + * @link_mode_masks: Variable length bitmaps. * * If autonegotiation is disabled, the speed and @duplex represent the * fixed link mode and are writable if the driver supports multiple diff --combined kernel/bpf/disasm.c index dad821c8ecd0,faa54d58972c..bbfc6bb79240 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@@ -19,23 -19,16 +19,23 @@@ static const char *__func_get_name(cons { BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
- if (insn->src_reg != BPF_PSEUDO_CALL && + if (!insn->src_reg && insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && func_id_str[insn->imm]) return func_id_str[insn->imm];
- if (cbs && cbs->cb_call) - return cbs->cb_call(cbs->private_data, insn); + if (cbs && cbs->cb_call) { + const char *res; + + res = cbs->cb_call(cbs->private_data, insn); + if (res) + return res; + }
if (insn->src_reg == BPF_PSEUDO_CALL) snprintf(buff, len, "%+d", insn->imm); + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + snprintf(buff, len, "kernel-function");
return buff; } @@@ -91,7 -84,7 +91,7 @@@ static const char *const bpf_atomic_alu [BPF_ADD >> 4] = "add", [BPF_AND >> 4] = "and", [BPF_OR >> 4] = "or", - [BPF_XOR >> 4] = "or", + [BPF_XOR >> 4] = "xor", };
static const char *const bpf_ldst_string[] = { diff --combined kernel/bpf/verifier.c index 852541a435ef,3a738724a380..f63b27574b3a --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -234,18 -234,6 +234,18 @@@ static bool bpf_pseudo_call(const struc insn->src_reg == BPF_PSEUDO_CALL; }
+static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == BPF_PSEUDO_KFUNC_CALL; +} + +static bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@@ -260,7 -248,6 +260,7 @@@ u32 btf_id; struct btf *ret_btf; u32 ret_btf_id; + u32 subprogno; };
struct btf *btf_vmlinux; @@@ -403,24 -390,6 +403,24 @@@ __printf(3, 4) static void verbose_linf env->prev_linfo = linfo; }
+static void verbose_invalid_scalar(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + struct tnum *range, const char *ctx, + const char *reg_name) +{ + char tn_buf[48]; + + verbose(env, "At %s the register %s ", ctx, reg_name); + if (!tnum_is_unknown(reg->var_off)) { + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "has value %s", tn_buf); + } else { + verbose(env, "has unknown scalar value"); + } + tnum_strn(tn_buf, sizeof(tn_buf), *range); + verbose(env, " should have been in %s\n", tn_buf); +} + static bool type_is_pkt_pointer(enum bpf_reg_type type) { return type == PTR_TO_PACKET || @@@ -440,7 -409,6 +440,7 @@@ static bool reg_type_not_null(enum bpf_ return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || + type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON; }
@@@ -483,8 -451,7 +483,8 @@@ static bool arg_type_may_be_null(enum b type == ARG_PTR_TO_MEM_OR_NULL || type == ARG_PTR_TO_CTX_OR_NULL || type == ARG_PTR_TO_SOCKET_OR_NULL || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || + type == ARG_PTR_TO_STACK_OR_NULL; }
/* Determine whether the function releases some resources allocated by another @@@ -574,8 -541,6 +574,8 @@@ static const char * const reg_type_str[ [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", [PTR_TO_RDWR_BUF] = "rdwr_buf", [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", };
static char slot_type_char[] = { @@@ -647,7 -612,6 +647,7 @@@ static void print_verifier_state(struc if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || + t == PTR_TO_MAP_KEY || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL) verbose(env, ",ks=%d,vs=%d", @@@ -1555,210 -1519,39 +1555,210 @@@ static int add_subprog(struct bpf_verif } ret = find_subprog(env, off); if (ret >= 0) - return 0; + return ret; if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { verbose(env, "too many subprograms\n"); return -E2BIG; } + /* determine subprog starts. The end is one before the next starts */ env->subprog_info[env->subprog_cnt++].start = off; sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL); + return env->subprog_cnt - 1; +} + +struct bpf_kfunc_desc { + struct btf_func_model func_model; + u32 func_id; + s32 imm; +}; + +#define MAX_KFUNC_DESCS 256 +struct bpf_kfunc_desc_tab { + struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; + u32 nr_descs; +}; + +static int kfunc_desc_cmp_by_id(const void *a, const void *b) +{ + const struct bpf_kfunc_desc *d0 = a; + const struct bpf_kfunc_desc *d1 = b; + + /* func_id is not greater than BTF_MAX_TYPE */ + return d0->func_id - d1->func_id; +} + +static const struct bpf_kfunc_desc * +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) +{ + struct bpf_kfunc_desc desc = { + .func_id = func_id, + }; + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + return bsearch(&desc, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); +} + +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) +{ + const struct btf_type *func, *func_proto; + struct bpf_kfunc_desc_tab *tab; + struct bpf_prog_aux *prog_aux; + struct bpf_kfunc_desc *desc; + const char *func_name; + unsigned long addr; + int err; + + prog_aux = env->prog->aux; + tab = prog_aux->kfunc_tab; + if (!tab) { + if (!btf_vmlinux) { + verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); + return -ENOTSUPP; + } + + if (!env->prog->jit_requested) { + verbose(env, "JIT is required for calling kernel function\n"); + return -ENOTSUPP; + } + + if (!bpf_jit_supports_kfunc_call()) { + verbose(env, "JIT does not support calling kernel function\n"); + return -ENOTSUPP; + } + + if (!env->prog->gpl_compatible) { + verbose(env, "cannot call kernel function from non-GPL compatible program\n"); + return -EINVAL; + } + + tab = kzalloc(sizeof(*tab), GFP_KERNEL); + if (!tab) + return -ENOMEM; + prog_aux->kfunc_tab = tab; + } + + if (find_kfunc_desc(env->prog, func_id)) + return 0; + + if (tab->nr_descs == MAX_KFUNC_DESCS) { + verbose(env, "too many different kernel function calls\n"); + return -E2BIG; + } + + func = btf_type_by_id(btf_vmlinux, func_id); + if (!func || !btf_type_is_func(func)) { + verbose(env, "kernel btf_id %u is not a function\n", + func_id); + return -EINVAL; + } + func_proto = btf_type_by_id(btf_vmlinux, func->type); + if (!func_proto || !btf_type_is_func_proto(func_proto)) { + verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", + func_id); + return -EINVAL; + } + + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + addr = kallsyms_lookup_name(func_name); + if (!addr) { + verbose(env, "cannot find address for kernel function %s\n", + func_name); + return -EINVAL; + } + + desc = &tab->descs[tab->nr_descs++]; + desc->func_id = func_id; + desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base; + err = btf_distill_func_proto(&env->log, btf_vmlinux, + func_proto, func_name, + &desc->func_model); + if (!err) + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id, NULL); + return err; +} + +static int kfunc_desc_cmp_by_imm(const void *a, const void *b) +{ + const struct bpf_kfunc_desc *d0 = a; + const struct bpf_kfunc_desc *d1 = b; + + if (d0->imm > d1->imm) + return 1; + else if (d0->imm < d1->imm) + return -1; return 0; }
-static int check_subprogs(struct bpf_verifier_env *env) +static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) +{ + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + if (!tab) + return; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_imm, NULL); +} + +bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) +{ + return !!prog->aux->kfunc_tab; +} + +const struct btf_func_model * +bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn) +{ + const struct bpf_kfunc_desc desc = { + .imm = insn->imm, + }; + const struct bpf_kfunc_desc *res; + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + res = bsearch(&desc, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); + + return res ? &res->func_model : NULL; +} + +static int add_subprog_and_kfunc(struct bpf_verifier_env *env) { - int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; + int i, ret, insn_cnt = env->prog->len;
/* Add entry function. */ ret = add_subprog(env, 0); - if (ret < 0) + if (ret) return ret;
- /* determine subprog starts. The end is one before the next starts */ - for (i = 0; i < insn_cnt; i++) { - if (!bpf_pseudo_call(insn + i)) + for (i = 0; i < insn_cnt; i++, insn++) { + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) && + !bpf_pseudo_kfunc_call(insn)) continue; + if (!env->bpf_capable) { - verbose(env, - "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); + verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); return -EPERM; } - ret = add_subprog(env, i + insn[i].imm + 1); + + if (bpf_pseudo_func(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + if (ret >= 0) + /* remember subprog */ + insn[1].imm = ret; + } else if (bpf_pseudo_call(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + } else { + ret = add_kfunc_call(env, insn->imm); + } + if (ret < 0) return ret; } @@@ -1772,16 -1565,6 +1772,16 @@@ for (i = 0; i < env->subprog_cnt; i++) verbose(env, "func#%d @%d\n", i, subprog[i].start);
+ return 0; +} + +static int check_subprogs(struct bpf_verifier_env *env) +{ + int i, subprog_start, subprog_end, off, cur_subprog = 0; + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + /* now check that all jumps are within the same subprog */ subprog_start = subprog[cur_subprog].start; subprog_end = subprog[cur_subprog + 1].start; @@@ -2090,17 -1873,6 +2090,17 @@@ static int get_prev_insn_idx(struct bpf return i; }
+static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) +{ + const struct btf_type *func; + + if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) + return NULL; + + func = btf_type_by_id(btf_vmlinux, insn->imm); + return btf_name_by_offset(btf_vmlinux, func->name_off); +} + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@@ -2109,7 -1881,6 +2109,7 @@@ static int backtrack_insn(struct bpf_ve u32 *reg_mask, u64 *stack_mask) { const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, .cb_print = verbose, .private_data = env, }; @@@ -2524,8 -2295,6 +2524,8 @@@ static bool is_spillable_regtype(enum b case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: case PTR_TO_MEM_OR_NULL: + case PTR_TO_FUNC: + case PTR_TO_MAP_KEY: return true; default: return false; @@@ -3130,10 -2899,6 +3130,10 @@@ static int __check_mem_access(struct bp
reg = &cur_regs(env)[regno]; switch (reg->type) { + case PTR_TO_MAP_KEY: + verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", + mem_size, off, size); + break; case PTR_TO_MAP_VALUE: verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size); @@@ -3539,9 -3304,6 +3539,9 @@@ static int check_ptr_alignment(struct b case PTR_TO_FLOW_KEYS: pointer_desc = "flow keys "; break; + case PTR_TO_MAP_KEY: + pointer_desc = "key "; + break; case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@@ -3643,7 -3405,7 +3643,7 @@@ process_func continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { - if (!bpf_pseudo_call(insn + i)) + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ ret_insn[frame] = i + 1; @@@ -4080,19 -3842,7 +4080,19 @@@ static int check_mem_access(struct bpf_ /* for access checks, reg->off is just part of off */ off += reg->off;
- if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_KEY) { + if (t == BPF_WRITE) { + verbose(env, "write to change key R%d not allowed\n", regno); + return -EACCES; + } + + err = check_mem_region_access(env, regno, off, size, + reg->map_ptr->key_size, false); + if (err) + return err; + if (value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@@ -4508,9 -4258,6 +4508,9 @@@ static int check_helper_mem_access(stru case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); + case PTR_TO_MAP_KEY: + return check_mem_region_access(env, regno, reg->off, access_size, + reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: if (check_map_access_type(env, regno, reg->off, access_size, meta && meta->raw_mode ? BPF_WRITE : @@@ -4727,7 -4474,6 +4727,7 @@@ static const struct bpf_reg_types map_k PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@@ -4759,7 -4505,6 +4759,7 @@@ static const struct bpf_reg_types mem_t PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, PTR_TO_RDONLY_BUF, @@@ -4772,7 -4517,6 +4772,7 @@@ static const struct bpf_reg_types int_p PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@@ -4785,8 -4529,6 +4785,8 @@@ static const struct bpf_reg_types const static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; +static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; +static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@@ -4815,8 -4557,6 +4815,8 @@@ [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, + [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, };
static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@@ -4998,8 -4738,6 +4998,8 @@@ skip_type_check verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_FUNC) { + meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. @@@ -5520,19 -5258,13 +5520,19 @@@ static void clear_caller_saved_regs(str } }
-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx) +typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx); + +static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) { struct bpf_verifier_state *state = env->cur_state; struct bpf_func_info_aux *func_info_aux; struct bpf_func_state *caller, *callee; - int i, err, subprog, target_insn; + int err; bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) { @@@ -5541,6 -5273,14 +5541,6 @@@ return -E2BIG; }
- target_insn = *insn_idx + insn->imm; - subprog = find_subprog(env, target_insn + 1); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", - target_insn + 1); - return -EFAULT; - } - caller = state->frame[state->curframe]; if (state->frame[state->curframe + 1]) { verbose(env, "verifier bug. Frame %d already allocated\n", @@@ -5551,7 -5291,7 +5551,7 @@@ func_info_aux = env->prog->aux->func_info_aux; if (func_info_aux) is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, subprog, caller->regs); + err = btf_check_subprog_arg_match(env, subprog, caller->regs); if (err == -EFAULT) return err; if (is_global) { @@@ -5595,9 -5335,11 +5595,9 @@@ if (err) return err;
- /* copy r1 - r5 args that callee can access. The copy includes parent - * pointers, which connects us up to the liveness chain - */ - for (i = BPF_REG_1; i <= BPF_REG_5; i++) - callee->regs[i] = caller->regs[i]; + err = set_callee_state_cb(env, caller, callee, *insn_idx); + if (err) + return err;
clear_caller_saved_regs(env, caller->regs);
@@@ -5605,7 -5347,7 +5605,7 @@@ state->curframe++;
/* and go analyze first insn of the callee */ - *insn_idx = target_insn; + *insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); @@@ -5616,92 -5358,6 +5616,92 @@@ return 0; }
+int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee) +{ + /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, + * void *callback_ctx, u64 flags); + * callback_fn(struct bpf_map *map, void *key, void *value, + * void *callback_ctx); + */ + callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr; + + /* pointer to stack or null */ + callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + return 0; +} + +static int set_callee_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, int insn_idx) +{ + int i; + + /* copy r1 - r5 args that callee can access. The copy includes parent + * pointers, which connects us up to the liveness chain + */ + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + callee->regs[i] = caller->regs[i]; + return 0; +} + +static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) +{ + int subprog, target_insn; + + target_insn = *insn_idx + insn->imm + 1; + subprog = find_subprog(env, target_insn); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", + target_insn); + return -EFAULT; + } + + return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); +} + +static int set_map_elem_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map; + int err; + + if (bpf_map_ptr_poisoned(insn_aux)) { + verbose(env, "tail_call abusing map_ptr\n"); + return -EINVAL; + } + + map = BPF_MAP_PTR(insn_aux->map_ptr_state); + if (!map->ops->map_set_for_each_callback_args || + !map->ops->map_for_each_callback) { + verbose(env, "callback function not allowed for map\n"); + return -ENOTSUPP; + } + + err = map->ops->map_set_for_each_callback_args(env, caller, callee); + if (err) + return err; + + callee->in_callback_fn = true; + return 0; +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@@ -5724,22 -5380,8 +5724,22 @@@
state->curframe--; caller = state->frame[state->curframe]; - /* return to the caller whatever r0 had in the callee */ - caller->regs[BPF_REG_0] = *r0; + if (callee->in_callback_fn) { + /* enforce R0 return value range [0, 1]. */ + struct tnum range = tnum_range(0, 1); + + if (r0->type != SCALAR_VALUE) { + verbose(env, "R0 not a scalar value\n"); + return -EACCES; + } + if (!tnum_in(range, r0->var_off)) { + verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + return -EINVAL; + } + } else { + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; + }
/* Transfer references to the caller */ err = transfer_reference_state(caller, callee); @@@ -5794,9 -5436,7 +5794,9 @@@ record_func_map(struct bpf_verifier_en func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem && - func_id != BPF_FUNC_map_peek_elem) + func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_for_each_map_elem && + func_id != BPF_FUNC_redirect_map) return 0;
if (map == NULL) { @@@ -5877,18 -5517,15 +5877,18 @@@ static int check_reference_leak(struct return state->acquired_refs ? -EINVAL : 0; }
-static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) +static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; + int insn_idx = *insn_idx_p; bool changes_data; - int i, err; + int i, err, func_id;
/* find function prototype */ + func_id = insn->imm; if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); @@@ -5934,7 -5571,7 +5934,7 @@@
meta.func_id = func_id; /* check args */ - for (i = 0; i < 5; i++) { + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { err = check_func_arg(env, i, &meta, fn); if (err) return err; @@@ -5984,13 -5621,6 +5984,13 @@@ return -EINVAL; }
+ if (func_id == BPF_FUNC_for_each_map_elem) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); + if (err < 0) + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@@ -6146,98 -5776,6 +6146,98 @@@ return 0; }
+/* mark_btf_func_reg_size() is used when the reg size is determined by + * the BTF func_proto's return value size and argument. + */ +static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, + size_t reg_size) +{ + struct bpf_reg_state *reg = &cur_regs(env)[regno]; + + if (regno == BPF_REG_0) { + /* Function return value */ + reg->live |= REG_LIVE_WRITTEN; + reg->subreg_def = reg_size == sizeof(u64) ? + DEF_NOT_SUBREG : env->insn_idx + 1; + } else { + /* Function argument */ + if (reg_size == sizeof(u64)) { + mark_insn_zext(env, reg); + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + } else { + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32); + } + } +} + +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) +{ + const struct btf_type *t, *func, *func_proto, *ptr_type; + struct bpf_reg_state *regs = cur_regs(env); + const char *func_name, *ptr_type_name; + u32 i, nargs, func_id, ptr_type_id; + const struct btf_param *args; + int err; + + func_id = insn->imm; + func = btf_type_by_id(btf_vmlinux, func_id); + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_proto = btf_type_by_id(btf_vmlinux, func->type); + + if (!env->ops->check_kfunc_call || + !env->ops->check_kfunc_call(func_id)) { + verbose(env, "calling kernel function %s is not allowed\n", + func_name); + return -EACCES; + } + + /* Check the arguments */ + err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + if (err) + return err; + + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(env, regs, caller_saved[i]); + + /* Check return type */ + t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + if (btf_type_is_scalar(t)) { + mark_reg_unknown(env, regs, BPF_REG_0); + mark_btf_func_reg_size(env, BPF_REG_0, t->size); + } else if (btf_type_is_ptr(t)) { + ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + &ptr_type_id); + if (!btf_type_is_struct(ptr_type)) { + ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type->name_off); + verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", + func_name, btf_type_str(ptr_type), + ptr_type_name); + return -EINVAL; + } + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].type = PTR_TO_BTF_ID; + regs[BPF_REG_0].btf_id = ptr_type_id; + mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ + + nargs = btf_type_vlen(func_proto); + args = (const struct btf_param *)(func_proto + 1); + for (i = 0; i < nargs; i++) { + u32 regno = i + 1; + + t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + if (btf_type_is_ptr(t)) + mark_btf_func_reg_size(env, regno, sizeof(void *)); + else + /* scalar. ensured by btf_check_kfunc_arg_match() */ + mark_btf_func_reg_size(env, regno, t->size); + } + + return 0; +} + static bool signed_add_overflows(s64 a, s64 b) { /* Do the add in u64, where overflow is well-defined */ @@@ -6371,7 -5909,7 +6371,7 @@@ static int update_alu_sanitation_state( aux->alu_limit != alu_limit)) return -EACCES;
- /* Corresponding fixup done in fixup_bpf_calls(). */ + /* Corresponding fixup done in do_misc_fixups(). */ aux->alu_state = alu_state; aux->alu_limit = alu_limit; return 0; @@@ -8725,24 -8263,6 +8725,24 @@@ static int check_ld_imm(struct bpf_veri return 0; }
+ if (insn->src_reg == BPF_PSEUDO_FUNC) { + struct bpf_prog_aux *aux = env->prog->aux; + u32 subprogno = insn[1].imm; + + if (!aux->func_info) { + verbose(env, "missing btf func_info\n"); + return -EINVAL; + } + if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { + verbose(env, "callback function not static\n"); + return -EINVAL; + } + + dst_reg->type = PTR_TO_FUNC; + dst_reg->subprogno = subprogno; + return 0; + } + map = env->used_maps[aux->map_index]; mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; @@@ -8971,7 -8491,17 +8971,7 @@@ static int check_return_code(struct bpf }
if (!tnum_in(range, reg->var_off)) { - char tn_buf[48]; - - verbose(env, "At program exit the register R0 "); - if (!tnum_is_unknown(reg->var_off)) { - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "has value %s", tn_buf); - } else { - verbose(env, "has unknown scalar value"); - } - tnum_strn(tn_buf, sizeof(tn_buf), range); - verbose(env, " should have been in %s\n", tn_buf); + verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); return -EINVAL; }
@@@ -9098,27 -8628,6 +9098,27 @@@ static int push_insn(int t, int w, int return DONE_EXPLORING; }
+static int visit_func_call_insn(int t, int insn_cnt, + struct bpf_insn *insns, + struct bpf_verifier_env *env, + bool visit_callee) +{ + int ret; + + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + if (ret) + return ret; + + if (t + 1 < insn_cnt) + init_explored_state(env, t + 1); + if (visit_callee) { + init_explored_state(env, t); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, + env, false); + } + return ret; +} + /* Visits the instruction at index t and returns one of the following: * < 0 - an error occurred * DONE_EXPLORING - the instruction was fully explored @@@ -9129,9 -8638,6 +9129,9 @@@ static int visit_insn(int t, int insn_c struct bpf_insn *insns = env->prog->insnsi; int ret;
+ if (bpf_pseudo_func(insns + t)) + return visit_func_call_insn(t, insn_cnt, insns, env, true); + /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insns[t].code) != BPF_JMP && BPF_CLASS(insns[t].code) != BPF_JMP32) @@@ -9142,8 -8648,18 +9142,8 @@@ return DONE_EXPLORING;
case BPF_CALL: - ret = push_insn(t, t + 1, FALLTHROUGH, env, false); - if (ret) - return ret; - - if (t + 1 < insn_cnt) - init_explored_state(env, t + 1); - if (insns[t].src_reg == BPF_PSEUDO_CALL) { - init_explored_state(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, - env, false); - } - return ret; + return visit_func_call_insn(t, insn_cnt, insns, env, + insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA: if (BPF_SRC(insns[t].code) != BPF_K) @@@ -9756,7 -9272,6 +9756,7 @@@ static bool regsafe(struct bpf_reg_stat */ return false; } + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. @@@ -10440,7 -9955,6 +10440,7 @@@ static int do_check(struct bpf_verifier
if (env->log.level & BPF_LOG_LEVEL) { const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, .cb_print = verbose, .private_data = env, }; @@@ -10588,8 -10102,7 +10588,8 @@@ if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 || (insn->src_reg != BPF_REG_0 && - insn->src_reg != BPF_PSEUDO_CALL) || + insn->src_reg != BPF_PSEUDO_CALL && + insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) { verbose(env, "BPF_CALL uses reserved fields\n"); @@@ -10604,12 -10117,11 +10604,12 @@@ } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + err = check_kfunc_call(env, insn); else - err = check_helper_call(env, insn->imm, env->insn_idx); + err = check_helper_call(env, insn, &env->insn_idx); if (err) return err; - } else if (opcode == BPF_JA) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || @@@ -11038,12 -10550,6 +11038,12 @@@ static int resolve_pseudo_ldimm64(struc goto next_insn; }
+ if (insn[0].src_reg == BPF_PSEUDO_FUNC) { + aux = &env->insn_aux_data[i]; + aux->ptr_type = PTR_TO_FUNC; + goto next_insn; + } + /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ @@@ -11176,13 -10682,9 +11176,13 @@@ static void convert_pseudo_ld_imm64(str int insn_cnt = env->prog->len; int i;
- for (i = 0; i < insn_cnt; i++, insn++) - if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) - insn->src_reg = 0; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + if (insn->src_reg == BPF_PSEUDO_FUNC) + continue; + insn->src_reg = 0; + } }
/* single env->prog->insni[off] instruction was replaced with the range @@@ -11821,12 -11323,6 +11821,12 @@@ static int jit_subprogs(struct bpf_veri return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + env->insn_aux_data[i].call_imm = insn->imm; + /* subprog is encoded in insn[1].imm */ + continue; + } + if (!bpf_pseudo_call(insn)) continue; /* Upon error here we cannot fall back to interpreter but @@@ -11916,7 -11412,6 +11916,7 @@@ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@@ -11957,12 -11452,6 +11957,12 @@@ for (i = 0; i < env->subprog_cnt; i++) { insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { + if (bpf_pseudo_func(insn)) { + subprog = insn[1].imm; + insn[0].imm = (u32)(long)func[subprog]->bpf_func; + insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; + continue; + } if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; @@@ -12008,11 -11497,6 +12008,11 @@@ * later look the same as if they were interpreted only. */ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + insn[0].imm = env->insn_aux_data[i].call_imm; + insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + continue; + } if (!bpf_pseudo_call(insn)) continue; insn->off = env->insn_aux_data[i].call_imm; @@@ -12024,7 -11508,7 +12024,7 @@@ prog->bpf_func = func[0]->bpf_func; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; - bpf_prog_free_unused_jited_linfo(prog); + bpf_prog_jit_attempt_done(prog); return 0; out_free: for (i = 0; i < env->subprog_cnt; i++) { @@@ -12047,7 -11531,7 +12047,7 @@@ out_undo_insn insn->off = 0; insn->imm = env->insn_aux_data[i].call_imm; } - bpf_prog_free_jited_linfo(prog); + bpf_prog_jit_attempt_done(prog); return err; }
@@@ -12056,7 -11540,6 +12056,7 @@@ static int fixup_call_args(struct bpf_v #ifndef CONFIG_BPF_JIT_ALWAYS_ON struct bpf_prog *prog = env->prog; struct bpf_insn *insn = prog->insnsi; + bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); int i, depth; #endif int err = 0; @@@ -12070,10 -11553,6 +12070,10 @@@ return err; } #ifndef CONFIG_BPF_JIT_ALWAYS_ON + if (has_kfunc_call) { + verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); + return -EINVAL; + } if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { /* When JIT fails the progs with bpf2bpf calls and tail_calls * have to be rejected, since interpreter doesn't support them yet. @@@ -12082,14 -11561,6 +12082,14 @@@ return -EINVAL; } for (i = 0; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* When JIT fails the progs with callback calls + * have to be rejected, since interpreter doesn't support them yet. + */ + verbose(env, "callbacks are not allowed in non-JITed programs\n"); + return -EINVAL; + } + if (!bpf_pseudo_call(insn)) continue; depth = get_callee_stack_depth(env, insn, i); @@@ -12102,30 -11573,12 +12102,30 @@@ return err; }
-/* fixup insn->imm field of bpf_call instructions - * and inline eligible helpers as explicit sequence of BPF instructions - * - * this function is called after eBPF program passed verification +static int fixup_kfunc_call(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + const struct bpf_kfunc_desc *desc; + + /* insn->imm has the btf func_id. Replace it with + * an address (relative to __bpf_base_call). + */ + desc = find_kfunc_desc(env->prog, insn->imm); + if (!desc) { + verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", + insn->imm); + return -EFAULT; + } + + insn->imm = desc->imm; + + return 0; +} + +/* Do various post-verification rewrites in a single program pass. + * These rewrites simplify JIT and interpreter implementations. */ -static int fixup_bpf_calls(struct bpf_verifier_env *env) +static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; bool expect_blinding = bpf_jit_blinding_enabled(prog); @@@ -12140,7 -11593,6 +12140,7 @@@ int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) { + /* Make divide-by-zero exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || @@@ -12181,7 -11633,6 +12181,7 @@@ continue; }
+ /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) { @@@ -12201,11 -11652,11 +12201,11 @@@ continue; }
+ /* Rewrite pointer arithmetic to mitigate speculation attacks. */ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; - struct bpf_insn insn_buf[16]; struct bpf_insn *patch = &insn_buf[0]; bool issrc, isneg; u32 off_reg; @@@ -12257,12 -11708,6 +12257,12 @@@ continue; if (insn->src_reg == BPF_PSEUDO_CALL) continue; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + ret = fixup_kfunc_call(env, insn); + if (ret) + return ret; + continue; + }
if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; @@@ -12355,8 -11800,7 +12355,8 @@@ insn->imm == BPF_FUNC_map_delete_elem || insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || - insn->imm == BPF_FUNC_map_peek_elem)) { + insn->imm == BPF_FUNC_map_peek_elem || + insn->imm == BPF_FUNC_redirect_map)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@@ -12398,9 -11842,6 +12398,9 @@@ (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map *map, void *value))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_redirect, + (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: @@@ -12427,16 -11868,11 +12427,16 @@@ insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - __bpf_call_base; continue; + case BPF_FUNC_redirect_map: + insn->imm = BPF_CAST_CALL(ops->map_redirect) - + __bpf_call_base; + continue; }
goto patch_call_imm; }
+ /* Implement bpf_jiffies64 inline. */ if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_jiffies64) { struct bpf_insn ld_jiffies_addr[2] = { @@@ -12492,8 -11928,6 +12492,8 @@@ patch_call_imm } }
+ sort_kfunc_descs_by_imm(env->prog); + return 0; }
@@@ -12604,7 -12038,7 +12604,7 @@@ static int do_check_common(struct bpf_v /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; mark_reg_known_zero(env, regs, BPF_REG_1); - ret = btf_check_func_arg_match(env, subprog, regs); + ret = btf_check_subprog_arg_match(env, subprog, regs); if (ret == -EFAULT) /* unlikely verifier bug. abort. * ret == 0 and ret < 0 are sadly acceptable for @@@ -12724,6 -12158,11 +12724,11 @@@ static int check_struct_ops_btf_id(stru u32 btf_id, member_idx; const char *mname;
+ if (!prog->gpl_compatible) { + verbose(env, "struct ops programs must have a GPL compatible license\n"); + return -EINVAL; + } + btf_id = prog->aux->attach_btf_id; st_ops = bpf_struct_ops_find(btf_id); if (!st_ops) { @@@ -13199,10 -12638,6 +13204,10 @@@ int bpf_check(struct bpf_prog **prog, u if (!env->explored_states) goto skip_full_check;
+ ret = add_subprog_and_kfunc(env); + if (ret < 0) + goto skip_full_check; + ret = check_subprogs(env); if (ret < 0) goto skip_full_check; @@@ -13253,7 -12688,7 +13258,7 @@@ skip_full_check ret = convert_ctx_accesses(env);
if (ret == 0) - ret = fixup_bpf_calls(env); + ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. diff --combined kernel/fork.c index 50209691f21a,426cd0c51f9e..85ca68f4b01e --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -96,7 -96,6 +96,7 @@@ #include <linux/kasan.h> #include <linux/scs.h> #include <linux/io_uring.h> +#include <linux/bpf.h>
#include <asm/pgalloc.h> #include <linux/uaccess.h> @@@ -735,7 -734,6 +735,7 @@@ void __put_task_struct(struct task_stru cgroup_free(tsk); task_numa_free(tsk, true); security_task_free(tsk); + bpf_task_storage_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); @@@ -1950,8 -1948,14 +1950,14 @@@ static __latent_entropy struct task_str p = dup_task_struct(current, node); if (!p) goto fork_out; - if (args->io_thread) + if (args->io_thread) { + /* + * Mark us an IO worker, and block any signal that isn't + * fatal or STOP + */ p->flags |= PF_IO_WORKER; + siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); + }
/* * This _must_ happen before we call free_task(), i.e. before we jump @@@ -2074,9 -2078,6 +2080,9 @@@ p->sequential_io = 0; p->sequential_io_avg = 0; #endif +#ifdef CONFIG_BPF_SYSCALL + RCU_INIT_POINTER(p->bpf_storage, NULL); +#endif
/* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); @@@ -2443,14 -2444,8 +2449,8 @@@ struct task_struct *create_io_thread(in .stack_size = (unsigned long)arg, .io_thread = 1, }; - struct task_struct *tsk;
- tsk = copy_process(NULL, 0, node, &args); - if (!IS_ERR(tsk)) { - sigfillset(&tsk->blocked); - sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); - } - return tsk; + return copy_process(NULL, 0, node, &args); }
/* diff --combined net/core/dev.c index 33ff4a944109,af8c1ea040b9..cc5df273f766 --- a/net/core/dev.c +++ b/net/core/dev.c @@@ -848,52 -848,6 +848,52 @@@ int dev_fill_metadata_dst(struct net_de } EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack) +{ + int k = stack->num_paths++; + + if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX)) + return NULL; + + return &stack->path[k]; +} + +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, + struct net_device_path_stack *stack) +{ + const struct net_device *last_dev; + struct net_device_path_ctx ctx = { + .dev = dev, + .daddr = daddr, + }; + struct net_device_path *path; + int ret = 0; + + stack->num_paths = 0; + while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { + last_dev = ctx.dev; + path = dev_fwd_path(stack); + if (!path) + return -1; + + memset(path, 0, sizeof(struct net_device_path)); + ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path); + if (ret < 0) + return -1; + + if (WARN_ON_ONCE(last_dev == ctx.dev)) + return -1; + } + path = dev_fwd_path(stack); + if (!path) + return -1; + path->type = DEV_PATH_ETHERNET; + path->dev = ctx.dev; + + return ret; +} +EXPORT_SYMBOL_GPL(dev_fill_forward_path); + /** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace @@@ -2509,14 -2463,16 +2509,14 @@@ int netdev_txq_to_tc(struct net_device EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS -struct static_key xps_needed __read_mostly; -EXPORT_SYMBOL(xps_needed); -struct static_key xps_rxqs_needed __read_mostly; -EXPORT_SYMBOL(xps_rxqs_needed); +static struct static_key xps_needed __read_mostly; +static struct static_key xps_rxqs_needed __read_mostly; static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
static bool remove_xps_queue(struct xps_dev_maps *dev_maps, - int tci, u16 index) + struct xps_dev_maps *old_maps, int tci, u16 index) { struct xps_map *map = NULL; int pos; @@@ -2535,8 -2491,6 +2535,8 @@@ break; }
+ if (old_maps) + RCU_INIT_POINTER(old_maps->attr_map[tci], NULL); RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); kfree_rcu(map, rcu); return false; @@@ -2549,7 -2503,7 +2549,7 @@@ static bool remove_xps_queue_cpu(struc struct xps_dev_maps *dev_maps, int cpu, u16 offset, u16 count) { - int num_tc = dev->num_tc ? : 1; + int num_tc = dev_maps->num_tc; bool active = false; int tci;
@@@ -2557,7 -2511,7 +2557,7 @@@ int i, j;
for (i = count, j = offset; i--; j++) { - if (!remove_xps_queue(dev_maps, tci, j)) + if (!remove_xps_queue(dev_maps, NULL, tci, j)) break; }
@@@ -2569,54 -2523,74 +2569,54 @@@
static void reset_xps_maps(struct net_device *dev, struct xps_dev_maps *dev_maps, - bool is_rxqs_map) + enum xps_map_type type) { - if (is_rxqs_map) { - static_key_slow_dec_cpuslocked(&xps_rxqs_needed); - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - } else { - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); - } static_key_slow_dec_cpuslocked(&xps_needed); + if (type == XPS_RXQS) + static_key_slow_dec_cpuslocked(&xps_rxqs_needed); + + RCU_INIT_POINTER(dev->xps_maps[type], NULL); + kfree_rcu(dev_maps, rcu); }
-static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, - struct xps_dev_maps *dev_maps, unsigned int nr_ids, - u16 offset, u16 count, bool is_rxqs_map) +static void clean_xps_maps(struct net_device *dev, enum xps_map_type type, + u16 offset, u16 count) { + struct xps_dev_maps *dev_maps; bool active = false; int i, j;
- for (j = -1; j = netif_attrmask_next(j, mask, nr_ids), - j < nr_ids;) - active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, - count); + dev_maps = xmap_dereference(dev->xps_maps[type]); + if (!dev_maps) + return; + + for (j = 0; j < dev_maps->nr_ids; j++) + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count); if (!active) - reset_xps_maps(dev, dev_maps, is_rxqs_map); + reset_xps_maps(dev, dev_maps, type);
- if (!is_rxqs_map) { - for (i = offset + (count - 1); count--; i--) { + if (type == XPS_CPUS) { + for (i = offset + (count - 1); count--; i--) netdev_queue_numa_node_write( - netdev_get_tx_queue(dev, i), - NUMA_NO_NODE); - } + netdev_get_tx_queue(dev, i), NUMA_NO_NODE); } }
static void netif_reset_xps_queues(struct net_device *dev, u16 offset, u16 count) { - const unsigned long *possible_mask = NULL; - struct xps_dev_maps *dev_maps; - unsigned int nr_ids; - if (!static_key_false(&xps_needed)) return;
cpus_read_lock(); mutex_lock(&xps_map_mutex);
- if (static_key_false(&xps_rxqs_needed)) { - dev_maps = xmap_dereference(dev->xps_rxqs_map); - if (dev_maps) { - nr_ids = dev->num_rx_queues; - clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, - offset, count, true); - } - } - - dev_maps = xmap_dereference(dev->xps_cpus_map); - if (!dev_maps) - goto out_no_maps; + if (static_key_false(&xps_rxqs_needed)) + clean_xps_maps(dev, XPS_RXQS, offset, count);
- if (num_possible_cpus() > 1) - possible_mask = cpumask_bits(cpu_possible_mask); - nr_ids = nr_cpu_ids; - clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count, - false); + clean_xps_maps(dev, XPS_CPUS, offset, count);
-out_no_maps: mutex_unlock(&xps_map_mutex); cpus_read_unlock(); } @@@ -2666,35 -2640,16 +2666,35 @@@ static struct xps_map *expand_xps_map(s return new_map; }
+/* Copy xps maps at a given index */ +static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps, + struct xps_dev_maps *new_dev_maps, int index, + int tc, bool skip_tc) +{ + int i, tci = index * dev_maps->num_tc; + struct xps_map *map; + + /* copy maps belonging to foreign traffic classes */ + for (i = 0; i < dev_maps->num_tc; i++, tci++) { + if (i == tc && skip_tc) + continue; + + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->attr_map[tci]); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); + } +} + /* Must be called under cpus_read_lock */ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map) + u16 index, enum xps_map_type type) { - const unsigned long *online_mask = NULL, *possible_mask = NULL; - struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL; + const unsigned long *online_mask = NULL; + bool active = false, copy = false; int i, j, tci, numa_node_id = -2; int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - bool active = false; unsigned int nr_ids;
if (dev->num_tc) { @@@ -2712,48 -2667,38 +2712,48 @@@ }
mutex_lock(&xps_map_mutex); - if (is_rxqs_map) { + + dev_maps = xmap_dereference(dev->xps_maps[type]); + if (type == XPS_RXQS) { maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues); - dev_maps = xmap_dereference(dev->xps_rxqs_map); nr_ids = dev->num_rx_queues; } else { maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc); - if (num_possible_cpus() > 1) { + if (num_possible_cpus() > 1) online_mask = cpumask_bits(cpu_online_mask); - possible_mask = cpumask_bits(cpu_possible_mask); - } - dev_maps = xmap_dereference(dev->xps_cpus_map); nr_ids = nr_cpu_ids; }
if (maps_sz < L1_CACHE_BYTES) maps_sz = L1_CACHE_BYTES;
+ /* The old dev_maps could be larger or smaller than the one we're + * setting up now, as dev->num_tc or nr_ids could have been updated in + * between. We could try to be smart, but let's be safe instead and only + * copy foreign traffic classes if the two map sizes match. + */ + if (dev_maps && + dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids) + copy = true; + /* allocate memory for queue storage */ for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids), j < nr_ids;) { - if (!new_dev_maps) - new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { - mutex_unlock(&xps_map_mutex); - return -ENOMEM; + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) { + mutex_unlock(&xps_map_mutex); + return -ENOMEM; + } + + new_dev_maps->nr_ids = nr_ids; + new_dev_maps->num_tc = num_tc; }
tci = j * num_tc + tc; - map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) : - NULL; + map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
- map = expand_xps_map(map, j, index, is_rxqs_map); + map = expand_xps_map(map, j, index, type == XPS_RXQS); if (!map) goto error;
@@@ -2766,21 -2711,29 +2766,21 @@@ if (!dev_maps) { /* Increment static keys at most once per type */ static_key_slow_inc_cpuslocked(&xps_needed); - if (is_rxqs_map) + if (type == XPS_RXQS) static_key_slow_inc_cpuslocked(&xps_rxqs_needed); }
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - /* copy maps belonging to foreign traffic classes */ - for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); - } + for (j = 0; j < nr_ids; j++) { + bool skip_tc = false;
- /* We need to explicitly update tci as prevous loop - * could break out early if dev_maps is NULL. - */ tci = j * num_tc + tc; - if (netif_attr_test_mask(j, mask, nr_ids) && netif_attr_test_online(j, online_mask, nr_ids)) { /* add tx-queue to CPU/rx-queue maps */ int pos = 0;
+ skip_tc = true; + map = xmap_dereference(new_dev_maps->attr_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@@ -2788,81 -2741,78 +2788,81 @@@ if (pos == map->len) map->queues[map->len++] = index; #ifdef CONFIG_NUMA - if (!is_rxqs_map) { + if (type == XPS_CPUS) { if (numa_node_id == -2) numa_node_id = cpu_to_node(j); else if (numa_node_id != cpu_to_node(j)) numa_node_id = -1; } #endif - } else if (dev_maps) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); }
- /* copy maps belonging to foreign traffic classes */ - for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); - } + if (copy) + xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc, + skip_tc); }
- if (is_rxqs_map) - rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps); - else - rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps); + rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
/* Cleanup old maps */ if (!dev_maps) goto out_no_old_maps;
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - for (i = num_tc, tci = j * num_tc; i--; tci++) { - new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + for (j = 0; j < dev_maps->nr_ids; j++) { + for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) { map = xmap_dereference(dev_maps->attr_map[tci]); - if (map && map != new_map) - kfree_rcu(map, rcu); + if (!map) + continue; + + if (copy) { + new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + if (map == new_map) + continue; + } + + RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); + kfree_rcu(map, rcu); } }
- kfree_rcu(dev_maps, rcu); + old_dev_maps = dev_maps;
out_no_old_maps: dev_maps = new_dev_maps; active = true;
out_no_new_maps: - if (!is_rxqs_map) { + if (type == XPS_CPUS) /* update Tx queue numa node */ netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), (numa_node_id >= 0) ? numa_node_id : NUMA_NO_NODE); - }
if (!dev_maps) goto out_no_maps;
/* removes tx-queue from unused CPUs/rx-queues */ - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - for (i = tc, tci = j * num_tc; i--; tci++) - active |= remove_xps_queue(dev_maps, tci, index); - if (!netif_attr_test_mask(j, mask, nr_ids) || - !netif_attr_test_online(j, online_mask, nr_ids)) - active |= remove_xps_queue(dev_maps, tci, index); - for (i = num_tc - tc, tci++; --i; tci++) - active |= remove_xps_queue(dev_maps, tci, index); + for (j = 0; j < dev_maps->nr_ids; j++) { + tci = j * dev_maps->num_tc; + + for (i = 0; i < dev_maps->num_tc; i++, tci++) { + if (i == tc && + netif_attr_test_mask(j, mask, dev_maps->nr_ids) && + netif_attr_test_online(j, online_mask, dev_maps->nr_ids)) + continue; + + active |= remove_xps_queue(dev_maps, + copy ? old_dev_maps : NULL, + tci, index); + } }
+ if (old_dev_maps) + kfree_rcu(old_dev_maps, rcu); + /* free map if not active */ if (!active) - reset_xps_maps(dev, dev_maps, is_rxqs_map); + reset_xps_maps(dev, dev_maps, type);
out_no_maps: mutex_unlock(&xps_map_mutex); @@@ -2870,10 -2820,11 +2870,10 @@@ return 0; error: /* remove any maps that we added */ - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { + for (j = 0; j < nr_ids; j++) { for (i = num_tc, tci = j * num_tc; i--; tci++) { new_map = xmap_dereference(new_dev_maps->attr_map[tci]); - map = dev_maps ? + map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL; if (new_map && new_map != map) @@@ -2894,7 -2845,7 +2894,7 @@@ int netif_set_xps_queue(struct net_devi int ret;
cpus_read_lock(); - ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false); + ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS); cpus_read_unlock();
return ret; @@@ -4005,15 -3956,13 +4005,15 @@@ sch_handle_egress(struct sk_buff *skb, static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb, struct xps_dev_maps *dev_maps, unsigned int tci) { + int tc = netdev_get_prio_tc_map(dev, skb->priority); struct xps_map *map; int queue_index = -1;
- if (dev->num_tc) { - tci *= dev->num_tc; - tci += netdev_get_prio_tc_map(dev, skb->priority); - } + if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids) + return queue_index; + + tci *= dev_maps->num_tc; + tci += tc;
map = rcu_dereference(dev_maps->attr_map[tci]); if (map) { @@@ -4044,18 -3993,18 +4044,18 @@@ static int get_xps_queue(struct net_dev if (!static_key_false(&xps_rxqs_needed)) goto get_cpus_map;
- dev_maps = rcu_dereference(sb_dev->xps_rxqs_map); + dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]); if (dev_maps) { int tci = sk_rx_queue_get(sk);
- if (tci >= 0 && tci < dev->num_rx_queues) + if (tci >= 0) queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci); }
get_cpus_map: if (queue_index < 0) { - dev_maps = rcu_dereference(sb_dev->xps_cpus_map); + dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]); if (dev_maps) { unsigned int tci = skb->sender_cpu - 1;
@@@ -5335,7 -5284,6 +5335,7 @@@ skip_classify goto another_round; case RX_HANDLER_EXACT: deliver_exact = true; + break; case RX_HANDLER_PASS: break; default: @@@ -5928,13 -5876,15 +5928,13 @@@ void napi_gro_flush(struct napi_struct } EXPORT_SYMBOL(napi_gro_flush);
-static struct list_head *gro_list_prepare(struct napi_struct *napi, - struct sk_buff *skb) +static void gro_list_prepare(const struct list_head *head, + const struct sk_buff *skb) { unsigned int maclen = skb->dev->hard_header_len; u32 hash = skb_get_hash_raw(skb); - struct list_head *head; struct sk_buff *p;
- head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list; list_for_each_entry(p, head, list) { unsigned long diffs;
@@@ -5960,6 -5910,8 +5960,6 @@@ maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } - - return head; }
static void skb_gro_reset_offset(struct sk_buff *skb) @@@ -6022,11 -5974,11 +6022,11 @@@ static void gro_flush_oldest(struct nap
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); + u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); + struct gro_list *gro_list = &napi->gro_hash[bucket]; struct list_head *head = &offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *gro_head; struct sk_buff *pp = NULL; enum gro_result ret; int same_flow; @@@ -6035,7 -5987,7 +6035,7 @@@ if (netif_elide_gro(skb->dev)) goto normal;
- gro_head = gro_list_prepare(napi, skb); + gro_list_prepare(&gro_list->list, skb);
rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@@ -6071,7 -6023,7 +6071,7 @@@
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, ipv6_gro_receive, inet_gro_receive, - gro_head, skb); + &gro_list->list, skb); break; } rcu_read_unlock(); @@@ -6090,7 -6042,7 +6090,7 @@@ if (pp) { skb_list_del_init(pp); napi_gro_complete(napi, pp); - napi->gro_hash[hash].count--; + gro_list->count--; }
if (same_flow) @@@ -6099,16 -6051,16 +6099,16 @@@ if (NAPI_GRO_CB(skb)->flush) goto normal;
- if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { - gro_flush_oldest(napi, gro_head); - } else { - napi->gro_hash[hash].count++; - } + if (unlikely(gro_list->count >= MAX_GRO_SKBS)) + gro_flush_oldest(napi, &gro_list->list); + else + gro_list->count++; + NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); - list_add(&skb->list, gro_head); + list_add(&skb->list, &gro_list->list); ret = GRO_HELD;
pull: @@@ -6116,11 -6068,11 +6116,11 @@@ if (grow > 0) gro_pull_from_frag0(skb, grow); ok: - if (napi->gro_hash[hash].count) { - if (!test_bit(hash, &napi->gro_bitmask)) - __set_bit(hash, &napi->gro_bitmask); - } else if (test_bit(hash, &napi->gro_bitmask)) { - __clear_bit(hash, &napi->gro_bitmask); + if (gro_list->count) { + if (!test_bit(bucket, &napi->gro_bitmask)) + __set_bit(bucket, &napi->gro_bitmask); + } else if (test_bit(bucket, &napi->gro_bitmask)) { + __clear_bit(bucket, &napi->gro_bitmask); }
return ret; @@@ -6837,7 -6789,6 +6837,7 @@@ int dev_set_threaded(struct net_device
return err; } +EXPORT_SYMBOL(dev_set_threaded);
void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) @@@ -7041,7 -6992,7 +7041,7 @@@ static int napi_thread_wait(struct napi
set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop() && !napi_disable_pending(napi)) { + while (!kthread_should_stop()) { /* Testing SCHED_THREADED bit here to make sure the current * kthread owns this napi and could poll on this napi. * Testing SCHED bit is not enough because SCHED bit might be @@@ -7059,6 -7010,7 +7059,7 @@@ set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); + return -1; }
@@@ -10385,20 -10337,14 +10386,20 @@@ EXPORT_SYMBOL(register_netdev)
int netdev_refcnt_read(const struct net_device *dev) { +#ifdef CONFIG_PCPU_DEV_REFCNT int i, refcnt = 0;
for_each_possible_cpu(i) refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); return refcnt; +#else + return refcount_read(&dev->dev_refcnt); +#endif } EXPORT_SYMBOL(netdev_refcnt_read);
+int netdev_unregister_timeout_secs __read_mostly = 10; + #define WAIT_REFS_MIN_MSECS 1 #define WAIT_REFS_MAX_MSECS 250 /** @@@ -10423,7 -10369,7 +10424,7 @@@ static void netdev_wait_allrefs(struct rebroadcast_time = warning_time = jiffies; refcnt = netdev_refcnt_read(dev);
- while (refcnt != 0) { + while (refcnt != 1) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { rtnl_lock();
@@@ -10460,9 -10406,7 +10461,9 @@@
refcnt = netdev_refcnt_read(dev);
- if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) { + if (refcnt != 1 && + time_after(jiffies, warning_time + + netdev_unregister_timeout_secs * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); warning_time = jiffies; @@@ -10538,7 -10482,7 +10539,7 @@@ void netdev_run_todo(void netdev_wait_allrefs(dev);
/* paranoia */ - BUG_ON(netdev_refcnt_read(dev)); + BUG_ON(netdev_refcnt_read(dev) != 1); BUG_ON(!list_empty(&dev->ptype_all)); BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); @@@ -10755,14 -10699,9 +10756,14 @@@ struct net_device *alloc_netdev_mqs(in dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p;
+#ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) goto free_dev; + dev_hold(dev); +#else + refcount_set(&dev->dev_refcnt, 1); +#endif
if (dev_addr_init(dev)) goto free_pcpu; @@@ -10826,10 -10765,8 +10827,10 @@@ free_all return NULL;
free_pcpu: +#ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); free_dev: +#endif netdev_freemem(dev); return NULL; } @@@ -10871,10 -10808,8 +10872,10 @@@ void free_netdev(struct net_device *dev list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p);
+#ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; +#endif free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL;
@@@ -11062,13 -10997,11 +11063,13 @@@ void unregister_netdev(struct net_devic EXPORT_SYMBOL(unregister_netdev);
/** - * dev_change_net_namespace - move device to different nethost namespace + * __dev_change_net_namespace - move device to different nethost namespace * @dev: device * @net: network namespace * @pat: If not NULL name pattern to try if the current device name * is already taken in the destination network namespace. + * @new_ifindex: If not zero, specifies device index in the target + * namespace. * * This function shuts down a device interface and moves it * to a new network namespace. On success 0 is returned, on @@@ -11077,11 -11010,10 +11078,11 @@@ * Callers must hold the rtnl semaphore. */
-int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +int __dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat, int new_ifindex) { struct net *net_old = dev_net(dev); - int err, new_nsid, new_ifindex; + int err, new_nsid;
ASSERT_RTNL();
@@@ -11112,11 -11044,6 +11113,11 @@@ goto out; }
+ /* Check that new_ifindex isn't used yet. */ + err = -EBUSY; + if (new_ifindex && __dev_get_by_index(net, new_ifindex)) + goto out; + /* * And now a mini version of register_netdevice unregister_netdevice. */ @@@ -11144,12 -11071,10 +11145,12 @@@
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) - new_ifindex = dev_new_index(net); - else - new_ifindex = dev->ifindex; + if (!new_ifindex) { + if (__dev_get_by_index(net, dev->ifindex)) + new_ifindex = dev_new_index(net); + else + new_ifindex = dev->ifindex; + }
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid, new_ifindex); @@@ -11202,7 -11127,7 +11203,7 @@@ out: return err; } -EXPORT_SYMBOL_GPL(dev_change_net_namespace); +EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
static int dev_cpu_dead(unsigned int oldcpu) { diff --combined net/core/rtnetlink.c index 9f1f55785a6f,3485b16a7ff3..714d5fa38546 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@@ -1877,7 -1877,6 +1877,7 @@@ static const struct nla_policy ifla_pol .len = ALTIFNAMSIZ - 1 }, [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, + [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), };
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@@ -2604,22 -2603,14 +2604,22 @@@ static int do_setlink(const struct sk_b return err;
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { - struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), - tb, CAP_NET_ADMIN); + struct net *net; + int new_ifindex; + + net = rtnl_link_get_net_capable(skb, dev_net(dev), + tb, CAP_NET_ADMIN); if (IS_ERR(net)) { err = PTR_ERR(net); goto errout; }
- err = dev_change_net_namespace(dev, net, ifname); + if (tb[IFLA_NEW_IFINDEX]) + new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]); + else + new_ifindex = 0; + + err = __dev_change_net_namespace(dev, net, ifname, new_ifindex); put_net(net); if (err) goto errout; @@@ -2872,7 -2863,7 +2872,7 @@@
BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
- err = af_ops->set_link_af(dev, af); + err = af_ops->set_link_af(dev, af, extack); if (err < 0) { rcu_read_unlock(); goto errout; diff --combined net/core/skmsg.c index 92a83c02562a,5def3a2e85be..43ce17a6a585 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@@ -399,104 -399,6 +399,104 @@@ out } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, + long timeo, int *err) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || + !skb_queue_empty(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} +EXPORT_SYMBOL_GPL(sk_msg_wait_data); + +/* Receive sk_msg from psock->ingress_msg to @msg. */ +int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + int len, int flags) +{ + struct iov_iter *iter = &msg->msg_iter; + int peek = flags & MSG_PEEK; + struct sk_msg *msg_rx; + int i, copied = 0; + + msg_rx = sk_psock_peek_msg(psock); + while (copied != len) { + struct scatterlist *sge; + + if (unlikely(!msg_rx)) + break; + + i = msg_rx->sg.start; + do { + struct page *page; + int copy; + + sge = sk_msg_elem(msg_rx, i); + copy = sge->length; + page = sg_page(sge); + if (copied + copy > len) + copy = len - copied; + copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (!copy) + return copied ? copied : -EFAULT; + + copied += copy; + if (likely(!peek)) { + sge->offset += copy; + sge->length -= copy; + if (!msg_rx->skb) + sk_mem_uncharge(sk, copy); + msg_rx->sg.size -= copy; + + if (!sge->length) { + sk_msg_iter_var_next(i); + if (!msg_rx->skb) + put_page(page); + } + } else { + /* Lets not optimize peek case if copy_page_to_iter + * didn't copy the entire length lets just break. + */ + if (copy != sge->length) + return copied; + sk_msg_iter_var_next(i); + } + + if (copied == len) + break; + } while (i != msg_rx->sg.end); + + if (unlikely(peek)) { + msg_rx = sk_psock_next_msg(psock, msg_rx); + if (!msg_rx) + break; + continue; + } + + msg_rx->sg.start = i; + if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + msg_rx = sk_psock_dequeue_msg(psock); + kfree_sk_msg(msg_rx); + } + msg_rx = sk_psock_peek_msg(psock); + } + + return copied; +} +EXPORT_SYMBOL_GPL(sk_msg_recvmsg); + static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { @@@ -508,7 -410,7 +508,7 @@@ if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); if (unlikely(!msg)) return NULL;
@@@ -586,6 -488,7 +586,7 @@@ static int sk_psock_skb_ingress_self(st if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); + skb_set_owner_r(skb, sk); return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); }
@@@ -595,7 -498,7 +596,7 @@@ static int sk_psock_handle_skb(struct s if (!ingress) { if (!sock_writeable(psock->sk)) return -EAGAIN; - return skb_send_sock_locked(psock->sk, skb, off, len); + return skb_send_sock(psock->sk, skb, off, len); } return sk_psock_skb_ingress(psock, skb); } @@@ -609,7 -512,8 +610,7 @@@ static void sk_psock_backlog(struct wor u32 len, off; int ret;
- /* Lock sock to avoid losing sk_socket during loop. */ - lock_sock(psock->sk); + mutex_lock(&psock->work_mutex); if (state->skb) { skb = state->skb; len = state->len; @@@ -622,11 -526,10 +623,11 @@@ len = skb->len; off = 0; start: - ingress = tcp_skb_bpf_ingress(skb); + ingress = skb_bpf_ingress(skb); + skb_bpf_redirect_clear(skb); do { ret = -EIO; - if (likely(psock->sk->sk_socket)) + if (!sock_flag(psock->sk, SOCK_DEAD)) ret = sk_psock_handle_skb(psock, skb, off, len, ingress); if (ret <= 0) { @@@ -650,7 -553,7 +651,7 @@@ kfree_skb(skb); } end: - release_sock(psock->sk); + mutex_unlock(&psock->work_mutex); }
struct sk_psock *sk_psock_init(struct sock *sk, int node) @@@ -660,6 -563,11 +661,6 @@@
write_lock_bh(&sk->sk_callback_lock);
- if (inet_csk_has_ulp(sk)) { - psock = ERR_PTR(-EINVAL); - goto out; - } - if (sk->sk_user_data) { psock = ERR_PTR(-EBUSY); goto out; @@@ -683,9 -591,7 +684,9 @@@ spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog); + mutex_init(&psock->work_mutex); INIT_LIST_HEAD(&psock->ingress_msg); + spin_lock_init(&psock->ingress_lock); skb_queue_head_init(&psock->ingress_skb);
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); @@@ -713,7 -619,7 +714,7 @@@ struct sk_psock_link *sk_psock_link_pop return link; }
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock) +static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) { struct sk_msg *msg, *tmp;
@@@ -724,14 -630,9 +725,14 @@@ } }
-static void sk_psock_zap_ingress(struct sk_psock *psock) +static void __sk_psock_zap_ingress(struct sk_psock *psock) { - __skb_queue_purge(&psock->ingress_skb); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) { + skb_bpf_redirect_clear(skb); + kfree_skb(skb); + } __sk_psock_purge_ingress_msg(psock); }
@@@ -745,35 -646,23 +746,35 @@@ static void sk_psock_link_destroy(struc } }
-static void sk_psock_destroy_deferred(struct work_struct *gc) +void sk_psock_stop(struct sk_psock *psock, bool wait) { - struct sk_psock *psock = container_of(gc, struct sk_psock, gc); + spin_lock_bh(&psock->ingress_lock); + sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); + sk_psock_cork_free(psock); + __sk_psock_zap_ingress(psock); + spin_unlock_bh(&psock->ingress_lock);
+ if (wait) + cancel_work_sync(&psock->work); +} + +static void sk_psock_done_strp(struct sk_psock *psock); + +static void sk_psock_destroy(struct work_struct *work) +{ + struct sk_psock *psock = container_of(to_rcu_work(work), + struct sk_psock, rwork); /* No sk_callback_lock since already detached. */
- /* Parser has been stopped */ - if (psock->progs.skb_parser) - strp_done(&psock->parser.strp); + sk_psock_done_strp(psock);
cancel_work_sync(&psock->work); + mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
sk_psock_link_destroy(psock); sk_psock_cork_free(psock); - sk_psock_zap_ingress(psock);
if (psock->sk_redir) sock_put(psock->sk_redir); @@@ -781,21 -670,30 +782,21 @@@ kfree(psock); }
-static void sk_psock_destroy(struct rcu_head *rcu) -{ - struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu); - - INIT_WORK(&psock->gc, sk_psock_destroy_deferred); - schedule_work(&psock->gc); -} - void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_cork_free(psock); - sk_psock_zap_ingress(psock); + sk_psock_stop(psock, false);
write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); - if (psock->progs.skb_parser) + if (psock->progs.stream_parser) sk_psock_stop_strp(sk, psock); - else if (psock->progs.skb_verdict) + else if (psock->progs.stream_verdict || psock->progs.skb_verdict) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); - sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- call_rcu(&psock->rcu, sk_psock_destroy); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); + queue_rcu_work(system_wq, &psock->rwork); } EXPORT_SYMBOL_GPL(sk_psock_drop);
@@@ -846,12 -744,27 +847,12 @@@ out } EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
-static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog, - struct sk_buff *skb) -{ - bpf_compute_data_end_sk_skb(skb); - return bpf_prog_run_pin_on_cpu(prog, skb); -} - -static struct sk_psock *sk_psock_from_strp(struct strparser *strp) -{ - struct sk_psock_parser *parser; - - parser = container_of(strp, struct sk_psock_parser, strp); - return container_of(parser, struct sk_psock, parser); -} - static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other;
- sk_other = tcp_skb_bpf_redirect_fetch(skb); + sk_other = skb_bpf_redirect_fetch(skb); /* This error is a buggy BPF program, it returned a redirect * return code, but then didn't set a redirect interface. */ @@@ -864,27 -777,20 +865,26 @@@ * error that caused the pipe to break. We can't send a packet on * a socket that is in this state so we drop the skb. */ - if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || - !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { + if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) { + kfree_skb(skb); + return; + } + spin_lock_bh(&psock_other->ingress_lock); + if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { + spin_unlock_bh(&psock_other->ingress_lock); kfree_skb(skb); return; }
skb_queue_tail(&psock_other->ingress_skb, skb); schedule_work(&psock_other->work); + spin_unlock_bh(&psock_other->ingress_lock); }
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict) { switch (verdict) { case __SK_REDIRECT: - skb_set_owner_r(skb, sk); sk_psock_skb_redirect(skb); break; case __SK_PASS: @@@ -900,17 -806,12 +900,13 @@@ int sk_psock_tls_strp_read(struct sk_ps int ret = __SK_PASS;
rcu_read_lock(); - prog = READ_ONCE(psock->progs.skb_verdict); + prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { - /* We skip full set_owner_r here because if we do a SK_PASS - * or SK_DROP we can skip skb memory accounting and use the - * TLS context. - */ skb->sk = psock->sk; - tcp_skb_bpf_redirect_clear(skb); - ret = sk_psock_bpf_run(psock, prog, skb); - ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + skb_dst_drop(skb); + skb_bpf_redirect_clear(skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } sk_psock_tls_verdict_apply(skb, psock->sk, ret); @@@ -922,6 -823,7 +918,6 @@@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_rea static void sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, int verdict) { - struct tcp_skb_cb *tcp; struct sock *sk_other; int err = -EIO;
@@@ -933,7 -835,8 +929,7 @@@ goto out_free; }
- tcp = TCP_SKB_CB(skb); - tcp->bpf.flags |= BPF_F_INGRESS; + skb_bpf_set_ingress(skb);
/* If the queue is empty then we can submit directly * into the msg queue. If its not empty we have to @@@ -945,12 -848,8 +941,12 @@@ err = sk_psock_skb_ingress_self(psock, skb); } if (err < 0) { - skb_queue_tail(&psock->ingress_skb, skb); - schedule_work(&psock->work); + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + skb_queue_tail(&psock->ingress_skb, skb); + schedule_work(&psock->work); + } + spin_unlock_bh(&psock->ingress_lock); } break; case __SK_REDIRECT: @@@ -963,24 -862,6 +959,24 @@@ out_free } }
+static void sk_psock_write_space(struct sock *sk) +{ + struct sk_psock *psock; + void (*write_space)(struct sock *sk) = NULL; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) { + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + schedule_work(&psock->work); + write_space = psock->saved_write_space; + } + rcu_read_unlock(); + if (write_space) + write_space(sk); +} + +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { struct sk_psock *psock; @@@ -995,13 -876,13 +991,14 @@@ kfree_skb(skb); goto out; } - skb_set_owner_r(skb, sk); - prog = READ_ONCE(psock->progs.skb_verdict); + prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { + skb->sk = sk; - tcp_skb_bpf_redirect_clear(skb); - ret = sk_psock_bpf_run(psock, prog, skb); - ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + skb_dst_drop(skb); + skb_bpf_redirect_clear(skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); + skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: @@@ -1015,15 -896,15 +1012,15 @@@ static int sk_psock_strp_read_done(stru
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock = container_of(strp, struct sk_psock, strp); struct bpf_prog *prog; int ret = skb->len;
rcu_read_lock(); - prog = READ_ONCE(psock->progs.skb_parser); + prog = READ_ONCE(psock->progs.stream_parser); if (likely(prog)) { skb->sk = psock->sk; - ret = sk_psock_bpf_run(psock, prog, skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); skb->sk = NULL; } rcu_read_unlock(); @@@ -1039,59 -920,16 +1036,59 @@@ static void sk_psock_strp_data_ready(st psock = sk_psock(sk); if (likely(psock)) { if (tls_sw_has_ctx_rx(sk)) { - psock->parser.saved_data_ready(sk); + psock->saved_data_ready(sk); } else { write_lock_bh(&sk->sk_callback_lock); - strp_data_ready(&psock->parser.strp); + strp_data_ready(&psock->strp); write_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); }
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) +{ + static const struct strp_callbacks cb = { + .rcv_msg = sk_psock_strp_read, + .read_sock_done = sk_psock_strp_read_done, + .parse_msg = sk_psock_strp_parse, + }; + + return strp_init(&psock->strp, sk, &cb); +} + +void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) +{ + if (psock->saved_data_ready) + return; + + psock->saved_data_ready = sk->sk_data_ready; + sk->sk_data_ready = sk_psock_strp_data_ready; + sk->sk_write_space = sk_psock_write_space; +} + +void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) +{ + if (!psock->saved_data_ready) + return; + + sk->sk_data_ready = psock->saved_data_ready; + psock->saved_data_ready = NULL; + strp_stop(&psock->strp); +} + +static void sk_psock_done_strp(struct sk_psock *psock) +{ + /* Parser has been stopped */ + if (psock->progs.stream_parser) + strp_done(&psock->strp); +} +#else +static void sk_psock_done_strp(struct sk_psock *psock) +{ +} +#endif /* CONFIG_BPF_STREAM_PARSER */ + static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, unsigned int offset, size_t orig_len) { @@@ -1115,15 -953,13 +1112,16 @@@ kfree_skb(skb); goto out; } - skb_set_owner_r(skb, sk); - prog = READ_ONCE(psock->progs.skb_verdict); + prog = READ_ONCE(psock->progs.stream_verdict); + if (!prog) + prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { + skb->sk = sk; - tcp_skb_bpf_redirect_clear(skb); - ret = sk_psock_bpf_run(psock, prog, skb); - ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + skb_dst_drop(skb); + skb_bpf_redirect_clear(skb); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); + skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: @@@ -1146,21 -982,82 +1144,21 @@@ static void sk_psock_verdict_data_ready sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv); }
-static void sk_psock_write_space(struct sock *sk) -{ - struct sk_psock *psock; - void (*write_space)(struct sock *sk) = NULL; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) { - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - schedule_work(&psock->work); - write_space = psock->saved_write_space; - } - rcu_read_unlock(); - if (write_space) - write_space(sk); -} - -int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) -{ - static const struct strp_callbacks cb = { - .rcv_msg = sk_psock_strp_read, - .read_sock_done = sk_psock_strp_read_done, - .parse_msg = sk_psock_strp_parse, - }; - - psock->parser.enabled = false; - return strp_init(&psock->parser.strp, sk, &cb); -} - void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) { - struct sk_psock_parser *parser = &psock->parser; - - if (parser->enabled) + if (psock->saved_data_ready) return;
- parser->saved_data_ready = sk->sk_data_ready; + psock->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = sk_psock_verdict_data_ready; sk->sk_write_space = sk_psock_write_space; - parser->enabled = true; -} - -void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) -{ - struct sk_psock_parser *parser = &psock->parser; - - if (parser->enabled) - return; - - parser->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_strp_data_ready; - sk->sk_write_space = sk_psock_write_space; - parser->enabled = true; -} - -void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) -{ - struct sk_psock_parser *parser = &psock->parser; - - if (!parser->enabled) - return; - - sk->sk_data_ready = parser->saved_data_ready; - parser->saved_data_ready = NULL; - strp_stop(&parser->strp); - parser->enabled = false; }
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) { - struct sk_psock_parser *parser = &psock->parser; - - if (!parser->enabled) + if (!psock->saved_data_ready) return;
- sk->sk_data_ready = parser->saved_data_ready; - parser->saved_data_ready = NULL; - parser->enabled = false; + sk->sk_data_ready = psock->saved_data_ready; + psock->saved_data_ready = NULL; } diff --combined net/ethtool/ioctl.c index 26b3e7086075,771688e1b0da..a9f67574148f --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@@ -426,29 -426,13 +426,13 @@@ struct ethtool_link_usettings int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { - const struct link_mode_info *link_info; - int err; - ASSERT_RTNL();
if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP;
memset(link_ksettings, 0, sizeof(*link_ksettings)); - - link_ksettings->link_mode = -1; - err = dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); - if (err) - return err; - - if (link_ksettings->link_mode != -1) { - link_info = &link_mode_params[link_ksettings->link_mode]; - link_ksettings->base.speed = link_info->speed; - link_ksettings->lanes = link_info->lanes; - link_ksettings->base.duplex = link_info->duplex; - } - - return 0; + return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); } EXPORT_SYMBOL(__ethtool_get_link_ksettings);
@@@ -1844,18 -1828,6 +1828,18 @@@ out return ret; }
+__printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(*data, ETH_GSTRING_LEN, fmt, args); + va_end(args); + + *data += ETH_GSTRING_LEN; +} +EXPORT_SYMBOL(ethtool_sprintf); + static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; @@@ -2568,9 -2540,6 +2552,9 @@@ static int ethtool_get_fecparam(struct if (rc) return rc;
+ if (WARN_ON_ONCE(fecparam.reserved)) + fecparam.reserved = 0; + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) return -EFAULT; return 0; @@@ -2586,12 -2555,6 +2570,12 @@@ static int ethtool_set_fecparam(struct if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) return -EFAULT;
+ if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE) + return -EINVAL; + + fecparam.active_fec = 0; + fecparam.reserved = 0; + return dev->ethtool_ops->set_fecparam(dev, &fecparam); }
diff --combined net/ipv4/esp4.c index 1ae920b93f39,4b834bbf95e0..dd1c752ea122 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@@ -279,7 -279,7 +279,7 @@@ static void esp_output_done(struct cryp x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } }
@@@ -309,7 -309,7 +309,7 @@@ static struct ip_esp_hdr *esp_output_se struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { @@@ -854,7 -854,7 +854,7 @@@ static void esp_input_set_header(struc struct ip_esp_hdr *esph;
/* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { diff --combined net/ipv4/tcp_bpf.c index 3d622a0d0753,bc7d2a586e18..4f49c12dae53 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@@ -10,6 -10,86 +10,6 @@@ #include <net/inet_common.h> #include <net/tls.h>
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, - struct msghdr *msg, int len, int flags) -{ - struct iov_iter *iter = &msg->msg_iter; - int peek = flags & MSG_PEEK; - struct sk_msg *msg_rx; - int i, copied = 0; - - msg_rx = list_first_entry_or_null(&psock->ingress_msg, - struct sk_msg, list); - - while (copied != len) { - struct scatterlist *sge; - - if (unlikely(!msg_rx)) - break; - - i = msg_rx->sg.start; - do { - struct page *page; - int copy; - - sge = sk_msg_elem(msg_rx, i); - copy = sge->length; - page = sg_page(sge); - if (copied + copy > len) - copy = len - copied; - copy = copy_page_to_iter(page, sge->offset, copy, iter); - if (!copy) - return copied ? copied : -EFAULT; - - copied += copy; - if (likely(!peek)) { - sge->offset += copy; - sge->length -= copy; - if (!msg_rx->skb) - sk_mem_uncharge(sk, copy); - msg_rx->sg.size -= copy; - - if (!sge->length) { - sk_msg_iter_var_next(i); - if (!msg_rx->skb) - put_page(page); - } - } else { - /* Lets not optimize peek case if copy_page_to_iter - * didn't copy the entire length lets just break. - */ - if (copy != sge->length) - return copied; - sk_msg_iter_var_next(i); - } - - if (copied == len) - break; - } while (i != msg_rx->sg.end); - - if (unlikely(peek)) { - if (msg_rx == list_last_entry(&psock->ingress_msg, - struct sk_msg, list)) - break; - msg_rx = list_next_entry(msg_rx, list); - continue; - } - - msg_rx->sg.start = i; - if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { - list_del(&msg_rx->list); - if (msg_rx->skb) - consume_skb(msg_rx->skb); - kfree(msg_rx); - } - msg_rx = list_first_entry_or_null(&psock->ingress_msg, - struct sk_msg, list); - } - - return copied; -} -EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg); - static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg, u32 apply_bytes, int flags) { @@@ -149,7 -229,7 +149,7 @@@ int tcp_bpf_sendmsg_redir(struct sock * } EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
-#ifdef CONFIG_BPF_STREAM_PARSER +#ifdef CONFIG_BPF_SYSCALL static bool tcp_bpf_stream_read(const struct sock *sk) { struct sk_psock *psock; @@@ -163,6 -243,28 +163,6 @@@ return !empty; }
-static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, - int flags, long timeo, int *err) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int ret = 0; - - if (sk->sk_shutdown & RCV_SHUTDOWN) - return 1; - - if (!timeo) - return ret; - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - ret = sk_wait_event(sk, &timeo, - !list_empty(&psock->ingress_msg) || - !skb_queue_empty(&sk->sk_receive_queue), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - return ret; -} - static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@@ -182,13 -284,13 +182,13 @@@ } lock_sock(sk); msg_bytes_ready: - copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); if (!copied) { int data, err = 0; long timeo;
timeo = sock_rcvtimeo(sk, nonblock); - data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); + data = sk_msg_wait_data(sk, psock, flags, timeo, &err); if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; @@@ -499,38 -601,20 +499,44 @@@ static int tcp_bpf_assert_proto_ops(str ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; }
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) +int tcp_bpf_update_proto(struct sock *sk, bool restore) { + struct sk_psock *psock = sk_psock(sk); int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
+ if (restore) { + if (inet_csk_has_ulp(sk)) { ++ /* TLS does not have an unhash proto in SW cases, ++ * but we need to ensure we stop using the sock_map ++ * unhash routine because the associated psock is being ++ * removed. So use the original unhash handler. ++ */ ++ WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + } else { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + } + return 0; + } + + if (inet_csk_has_ulp(sk)) + return -EINVAL; + if (sk->sk_family == AF_INET6) { if (tcp_bpf_assert_proto_ops(psock->sk_proto)) - return ERR_PTR(-EINVAL); + return -EINVAL;
tcp_bpf_check_v6_needs_rebuild(psock->sk_proto); }
- return &tcp_bpf_prots[family][config]; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); + return 0; } +EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
/* If a child got cloned from a listening socket that had tcp_bpf * protocol callbacks installed, we need to restore the callbacks to @@@ -545,4 -629,4 +551,4 @@@ void tcp_bpf_clone(const struct sock *s if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) newsk->sk_prot = sk->sk_prot_creator; } -#endif /* CONFIG_BPF_STREAM_PARSER */ +#endif /* CONFIG_BPF_SYSCALL */ diff --combined net/ipv4/udp.c index bfcc7f1a8a7f,99d743eb9dc4..15f5504adf5b --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@@ -1782,35 -1782,6 +1782,35 @@@ busy_check } EXPORT_SYMBOL(__skb_recv_udp);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + int copied = 0; + + while (1) { + struct sk_buff *skb; + int err, used; + + skb = skb_recv_udp(sk, 0, 1, &err); + if (!skb) + return err; + used = recv_actor(desc, skb, 0, skb->len); + if (used <= 0) { + if (!copied) + copied = used; + break; + } else if (used <= skb->len) { + copied += used; + } + + if (!desc->count) + break; + } + + return copied; +} +EXPORT_SYMBOL(udp_read_sock); + /* * This should be easy, if there is something there we * return it, otherwise we block. @@@ -2207,8 -2178,6 +2207,8 @@@ static int udp_queue_rcv_skb(struct soc segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + + udp_post_segment_fix_csum(skb); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); @@@ -2695,12 -2664,9 +2695,12 @@@ int udp_lib_setsockopt(struct sock *sk
case UDP_GRO: lock_sock(sk); + + /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk->sk_socket); up->gro_enabled = valbool; + up->accept_udp_l4 = valbool; release_sock(sk); break;
@@@ -2788,6 -2754,10 +2788,10 @@@ int udp_lib_getsockopt(struct sock *sk val = up->gso_size; break;
+ case UDP_GRO: + val = up->gro_enabled; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: @@@ -2883,9 -2853,6 +2887,9 @@@ struct proto udp_prot = .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = udp_bpf_update_proto, +#endif .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), diff --combined net/ipv6/addrconf.c index 120073ffb666,a9e53f5942fa..dbb5bb9269bb --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@@ -2358,7 -2358,7 +2358,7 @@@ regen /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1: * check if generated address is not inappropriate: * - * - Reserved IPv6 Interface Identifers + * - Reserved IPv6 Interface Identifiers * - XXX: already assigned to an address on the device */
@@@ -5107,20 -5107,17 +5107,20 @@@ next break; } case MULTICAST_ADDR: + read_unlock_bh(&idev->lock); fillargs->event = RTM_GETMULTICAST;
/* multicast address */ - for (ifmca = idev->mc_list; ifmca; - ifmca = ifmca->next, ip_idx++) { + for (ifmca = rcu_dereference(idev->mc_list); + ifmca; + ifmca = rcu_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); if (err < 0) break; } + read_lock_bh(&idev->lock); break; case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; @@@ -5672,7 -5669,8 +5672,8 @@@ static int inet6_fill_link_af(struct sk return 0; }
- static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) + static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, + struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct net_device *dev = idev->dev; @@@ -5683,12 -5681,29 +5684,29 @@@
if (!token) return -EINVAL; - if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + + if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG_MOD(extack, "Device is loopback"); return -EINVAL; - if (!ipv6_accept_ra(idev)) + } + + if (dev->flags & IFF_NOARP) { + NL_SET_ERR_MSG_MOD(extack, + "Device does not do neighbour discovery"); + return -EINVAL; + } + + if (!ipv6_accept_ra(idev)) { + NL_SET_ERR_MSG_MOD(extack, + "Router advertisement is disabled on device"); return -EINVAL; - if (idev->cnf.rtr_solicits == 0) + } + + if (idev->cnf.rtr_solicits == 0) { + NL_SET_ERR_MSG(extack, + "Router solicitation is disabled on device"); return -EINVAL; + }
write_lock_bh(&idev->lock);
@@@ -5796,7 -5811,8 +5814,8 @@@ static int inet6_validate_link_af(cons return 0; }
- static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct inet6_dev *idev = __in6_dev_get(dev); struct nlattr *tb[IFLA_INET6_MAX + 1]; @@@ -5809,7 -5825,8 +5828,8 @@@ BUG();
if (tb[IFLA_INET6_TOKEN]) { - err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), + extack); if (err) return err; } @@@ -6096,8 -6113,10 +6116,8 @@@ static void __ipv6_ifa_notify(int event
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - rcu_read_lock_bh(); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - rcu_read_unlock_bh(); }
#ifdef CONFIG_SYSCTL diff --combined net/ipv6/ip6_vti.c index 856e46ad0895,e0cc32e45880..2d048e21abbb --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@@ -193,6 -193,7 +193,6 @@@ static int vti6_tnl_create2(struct net_
strcpy(t->parms.name, dev->name);
- dev_hold(dev); vti6_tnl_link(ip6n, t);
return 0; @@@ -493,7 -494,7 +493,7 @@@ vti6_xmit(struct sk_buff *skb, struct n }
if (dst->flags & DST_XFRM_QUEUE) - goto queued; + goto xmit;
x = dst->xfrm; if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) @@@ -522,6 -523,8 +522,8 @@@
icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { + if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } @@@ -530,7 -533,7 +532,7 @@@ goto tx_err_dst_release; }
- queued: + xmit: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; @@@ -931,7 -934,6 +933,7 @@@ static inline int vti6_dev_init_gen(str dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + dev_hold(dev); return 0; }
@@@ -963,6 -965,7 +965,6 @@@ static int __net_init vti6_fb_tnl_dev_i struct vti6_net *ip6n = net_generic(net, vti6_net_id);
t->parms.proto = IPPROTO_IPV6; - dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --combined net/ipv6/route.c index 28801ae80548,373d48073106..a22822bdbf39 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@@ -2085,10 -2085,13 +2085,10 @@@ static void rt6_age_examine_exception(s
if (rt->rt6i_flags & RTF_GATEWAY) { struct neighbour *neigh; - __u8 neigh_flags = 0;
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); - if (neigh) - neigh_flags = neigh->flags;
- if (!(neigh_flags & NTF_ROUTER)) { + if (!(neigh && (neigh->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); rt6_remove_exception(bucket, rt6_ex); @@@ -2357,7 -2360,7 +2357,7 @@@ u32 rt6_multipath_hash(const struct ne
memset(&hash_keys, 0, sizeof(hash_keys));
- if (!flkeys) { + if (!flkeys) { skb_flow_dissect_flow_keys(skb, &keys, flag); flkeys = &keys; } @@@ -2497,20 -2500,20 +2497,20 @@@ struct dst_entry *ip6_route_output_flag struct flowi6 *fl6, int flags) { - struct dst_entry *dst; - struct rt6_info *rt6; + struct dst_entry *dst; + struct rt6_info *rt6;
- rcu_read_lock(); - dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; - /* For dst cached in uncached_list, refcnt is already taken. */ - if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { - dst = &net->ipv6.ip6_null_entry->dst; - dst_hold(dst); - } - rcu_read_unlock(); + rcu_read_lock(); + dst = ip6_route_output_flags_noref(net, sk, fl6, flags); + rt6 = (struct rt6_info *)dst; + /* For dst cached in uncached_list, refcnt is already taken. */ + if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + } + rcu_read_unlock();
- return dst; + return dst; } EXPORT_SYMBOL_GPL(ip6_route_output_flags);
@@@ -5206,9 -5209,11 +5206,11 @@@ static int ip6_route_multipath_add(stru * nexthops have been replaced by first new, the rest should * be added to it. */ - cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | - NLM_F_REPLACE); - cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; + if (cfg->fc_nlinfo.nlh) { + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | + NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; + } nhn++; }
@@@ -6072,7 -6077,7 +6074,7 @@@ void fib6_info_hw_flags_set(struct net
if (!rcu_access_pointer(f6i->fib6_node)) /* The route was removed from the tree, do not send - * notfication. + * notification. */ return;
diff --combined net/mac80211/cfg.c index a0a11624a5be,860bc35383d5..7a99892e5aba --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@@ -1486,7 -1486,7 +1486,7 @@@ static int sta_apply_parameters(struct sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
/* auth flags will be set later for TDLS, - * and for unassociated stations that move to assocaited */ + * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { @@@ -1788,8 -1788,10 +1788,10 @@@ static int ieee80211_change_station(str }
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) + sta->sdata->u.vlan.sta) { + ieee80211_clear_fast_rx(sta); RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); + }
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); diff --combined net/mptcp/protocol.c index e894345d10c1,4bde960e19dc..8009b3f8e4c1 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@@ -11,7 -11,6 +11,6 @@@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> - #include <linux/igmp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@@ -20,7 -19,6 +19,6 @@@ #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> - #include <net/addrconf.h> #endif #include <net/mptcp.h> #include <net/xfrm.h> @@@ -493,7 -491,7 +491,7 @@@ static bool mptcp_check_data_fin(struc u64 rcv_data_fin_seq; bool ret = false;
- if (__mptcp_check_fallback(msk) || !msk->first) + if (__mptcp_check_fallback(msk)) return ret;
/* Need to ack a DATA_FIN received from a peer while this side @@@ -2047,21 -2045,28 +2045,21 @@@ out_err return copied; }
-static void mptcp_retransmit_handler(struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - set_bit(MPTCP_WORK_RTX, &msk->flags); - mptcp_schedule_work(sk); -} - static void mptcp_retransmit_timer(struct timer_list *t) { struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk);
bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - mptcp_retransmit_handler(sk); + /* we need a process context to retransmit */ + if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags)) + mptcp_schedule_work(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, - &sk->sk_tsq_flags)) - sock_hold(sk); + set_bit(MPTCP_RETRANSMIT, &msk->flags); } bh_unlock_sock(sk); sock_put(sk); @@@ -2871,6 -2876,48 +2869,48 @@@ static int mptcp_setsockopt_v6(struct m return ret; }
+ static bool mptcp_unsupported(int level, int optname) + { + if (level == SOL_IP) { + switch (optname) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + if (level == SOL_IPV6) { + switch (optname) { + case IPV6_ADDRFORM: + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + return false; + } + static int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@@ -2879,6 -2926,9 +2919,9 @@@
pr_debug("msk=%p", msk);
+ if (mptcp_unsupported(level, optname)) + return -ENOPROTOOPT; + if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
@@@ -2951,16 -3001,17 +2994,16 @@@ void __mptcp_check_push(struct sock *sk } }
-#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) - /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) { - unsigned long flags, nflags; - for (;;) { - flags = 0; + unsigned long flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) flags |= BIT(MPTCP_PUSH_PENDING); + if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_RETRANSMIT); if (!flags) break;
@@@ -2975,8 -3026,6 +3018,8 @@@ spin_unlock_bh(&sk->sk_lock.slock); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_RETRANSMIT)) + __mptcp_retrans(sk);
cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@@ -2992,6 -3041,20 +3035,6 @@@ */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); - - do { - flags = sk->sk_tsq_flags; - if (!(flags & MPTCP_DEFERRED_ALL)) - return; - nflags = flags & ~MPTCP_DEFERRED_ALL; - } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - - sock_release_ownership(sk); - - if (flags & TCPF_WRITE_TIMER_DEFERRED) { - mptcp_retransmit_handler(sk); - __sock_put(sk); - } }
void mptcp_subflow_process_delegated(struct sock *ssk) @@@ -3090,18 -3153,14 +3133,18 @@@ bool mptcp_finish_join(struct sock *ssk pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */ - if (!mptcp_is_fully_established(parent)) + if (!mptcp_is_fully_established(parent)) { + subflow->reset_reason = MPTCP_RST_EMPTCP; return false; + }
if (!msk->pm.server_side) goto out;
- if (!mptcp_pm_allow_new_subflow(msk)) + if (!mptcp_pm_allow_new_subflow(msk)) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + }
/* active connections are already on conn_list, and we can't acquire * msk lock here. @@@ -3115,10 -3174,8 +3158,10 @@@ sock_hold(ssk); } spin_unlock_bh(&msk->join_list_lock); - if (!ret) + if (!ret) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + }
/* attach to msk socket only after we are sure he will deal with us * at close time @@@ -3230,12 -3287,8 +3273,12 @@@ static int mptcp_stream_connect(struct if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) mptcp_subflow_early_fallback(msk, subflow); #endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) + if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { + MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); + } + if (likely(!__mptcp_check_fallback(msk))) + MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); @@@ -3409,34 -3462,10 +3452,10 @@@ static __poll_t mptcp_poll(struct file return mask; }
- static int mptcp_release(struct socket *sock) - { - struct mptcp_subflow_context *subflow; - struct sock *sk = sock->sk; - struct mptcp_sock *msk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - } - - release_sock(sk); - - return inet_release(sock); - } - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, - .release = mptcp_release, + .release = inet_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, @@@ -3528,35 -3557,10 +3547,10 @@@ void __init mptcp_proto_init(void }
#if IS_ENABLED(CONFIG_MPTCP_IPV6) - static int mptcp6_release(struct socket *sock) - { - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk; - struct sock *sk = sock->sk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - ipv6_sock_mc_close(ssk); - ipv6_sock_ac_close(ssk); - } - - release_sock(sk); - return inet6_release(sock); - } - static const struct proto_ops mptcp_v6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, - .release = mptcp6_release, + .release = inet6_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, diff --combined net/openvswitch/conntrack.c index c29b0ef1fc27,d217bd91176b..cadb6a29b285 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@@ -809,7 -809,8 +809,7 @@@ static int ovs_ct_nat_execute(struct sk
err = nf_nat_packet(ct, ctinfo, hooknum, skb); push: - skb_push(skb, nh_off); - skb_postpush_rcsum(skb, skb->data, nh_off); + skb_push_rcsum(skb, nh_off);
return err; } @@@ -1321,7 -1322,8 +1321,7 @@@ int ovs_ct_execute(struct net *net, str else err = ovs_ct_lookup(net, key, info, skb);
- skb_push(skb, nh_ofs); - skb_postpush_rcsum(skb, skb->data, nh_ofs); + skb_push_rcsum(skb, nh_ofs); if (err) kfree_skb(skb); return err; @@@ -2032,10 -2034,10 +2032,10 @@@ static int ovs_ct_limit_del_zone_limit( static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info, struct sk_buff *reply) { - struct ovs_zone_limit zone_limit; - - zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE; - zone_limit.limit = info->default_limit; + struct ovs_zone_limit zone_limit = { + .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, + .limit = info->default_limit, + };
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); } diff --combined net/qrtr/qrtr.c index 4b46c69e14ab,1e4fb568fa84..c0477bec09bd --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@@ -20,8 -20,6 +20,8 @@@ /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff +#define QRTR_EPH_PORT_RANGE \ + XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
/** * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 @@@ -108,7 -106,8 +108,7 @@@ static LIST_HEAD(qrtr_all_nodes) static DEFINE_MUTEX(qrtr_node_lock);
/* local port allocation management */ -static DEFINE_IDR(qrtr_ports); -static DEFINE_MUTEX(qrtr_port_lock); +static DEFINE_XARRAY_ALLOC(qrtr_ports);
/** * struct qrtr_node - endpoint node @@@ -272,7 -271,10 +272,10 @@@ static int qrtr_tx_wait(struct qrtr_nod flow = kzalloc(sizeof(*flow), GFP_KERNEL); if (flow) { init_waitqueue_head(&flow->resume_tx); - radix_tree_insert(&node->qrtr_tx_flow, key, flow); + if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { + kfree(flow); + flow = NULL; + } } } mutex_unlock(&node->qrtr_tx_lock); @@@ -654,7 -656,7 +657,7 @@@ static struct qrtr_sock *qrtr_port_look port = 0;
rcu_read_lock(); - ipc = idr_find(&qrtr_ports, port); + ipc = xa_load(&qrtr_ports, port); if (ipc) sock_hold(&ipc->sk); rcu_read_unlock(); @@@ -696,7 -698,9 +699,7 @@@ static void qrtr_port_remove(struct qrt
__sock_put(&ipc->sk);
- mutex_lock(&qrtr_port_lock); - idr_remove(&qrtr_ports, port); - mutex_unlock(&qrtr_port_lock); + xa_erase(&qrtr_ports, port);
/* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ @@@ -715,20 -719,29 +718,20 @@@ */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { - u32 min_port; int rc;
- mutex_lock(&qrtr_port_lock); if (!*port) { - min_port = QRTR_MIN_EPH_SOCKET; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE, + GFP_KERNEL); } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { - min_port = 0; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC); + rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL); } else { - min_port = *port; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL); } - mutex_unlock(&qrtr_port_lock);
- if (rc == -ENOSPC) + if (rc == -EBUSY) return -EADDRINUSE; else if (rc < 0) return rc; @@@ -742,16 -755,20 +745,16 @@@ static void qrtr_reset_ports(void) { struct qrtr_sock *ipc; - int id; - - mutex_lock(&qrtr_port_lock); - idr_for_each_entry(&qrtr_ports, ipc, id) { - /* Don't reset control port */ - if (id == 0) - continue; + unsigned long index;
+ rcu_read_lock(); + xa_for_each_start(&qrtr_ports, index, ipc, 1) { sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; ipc->sk.sk_error_report(&ipc->sk); sock_put(&ipc->sk); } - mutex_unlock(&qrtr_port_lock); + rcu_read_unlock(); }
/* Bind socket to address. diff --combined net/rds/send.c index 53444397de66,fe5264b9d4b3..ee7214ea0fdb --- a/net/rds/send.c +++ b/net/rds/send.c @@@ -665,7 -665,7 +665,7 @@@ static void rds_send_remove_from_sock(s unlock_and_drop: spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); - if (was_on_sock) + if (was_on_sock && rm) rds_message_put(rm); }
@@@ -1225,7 -1225,7 +1225,7 @@@ int rds_sendmsg(struct socket *sock, st } /* If the socket is already bound to a link local address, * it can only send to peers on the same link. But allow - * communicating beween link local and non-link local address. + * communicating between link local and non-link local address. */ if (scope_id != rs->rs_bound_scope_id) { if (!scope_id) { diff --combined net/sched/cls_api.c index d3db70865d66,340d5af86e87..40fbea626dfd --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@@ -646,7 -646,7 +646,7 @@@ static void tc_block_indr_cleanup(struc struct net_device *dev = block_cb->indr.dev; struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; - struct flow_block_offload bo; + struct flow_block_offload bo = {};
tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, @@@ -3040,6 -3040,7 +3040,7 @@@ int tcf_exts_validate(struct net *net, { #ifdef CONFIG_NET_CLS_ACT { + int init_res[TCA_ACT_MAX_PRIO] = {}; struct tc_action *act; size_t attr_size = 0;
@@@ -3051,12 -3052,11 +3052,11 @@@ return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, a_o, rtnl_held, - extack); - if (IS_ERR(act)) { - module_put(a_o->owner); + TCA_ACT_BIND, a_o, init_res, + rtnl_held, extack); + module_put(a_o->owner); + if (IS_ERR(act)) return PTR_ERR(act); - }
act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; @@@ -3067,8 -3067,8 +3067,8 @@@
err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, &attr_size, - rtnl_held, extack); + exts->actions, init_res, + &attr_size, rtnl_held, extack); if (err < 0) return err; exts->nr_actions = err; @@@ -3662,9 -3662,6 +3662,9 @@@ int tc_setup_flow_action(struct flow_ac entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.burst_pkt = tcf_police_burst_pkt(act); + entry->police.rate_pkt_ps = + tcf_police_rate_pkt_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); entry->police.index = act->tcfa_index; } else if (is_tcf_ct(act)) { diff --combined net/tipc/crypto.c index 76b8428c94a7,97710ce36047..e5c43d4d5a75 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@@ -317,7 -317,7 +317,7 @@@ static int tipc_aead_key_generate(struc
#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \ do { \ - typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr), \ + struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \ lockdep_is_held(lock)); \ rcu_assign_pointer((rcu_ptr), (ptr)); \ tipc_aead_put(__tmp); \ @@@ -798,7 -798,7 +798,7 @@@ static int tipc_aead_encrypt(struct tip ehdr = (struct tipc_ehdr *)skb->data; salt = aead->salt; if (aead->mode == CLUSTER_KEY) - salt ^= ehdr->addr; /* __be32 */ + salt ^= __be32_to_cpu(ehdr->addr); else if (__dnode) salt ^= tipc_node_get_addr(__dnode); memcpy(iv, &salt, 4); @@@ -929,7 -929,7 +929,7 @@@ static int tipc_aead_decrypt(struct ne ehdr = (struct tipc_ehdr *)skb->data; salt = aead->salt; if (aead->mode == CLUSTER_KEY) - salt ^= ehdr->addr; /* __be32 */ + salt ^= __be32_to_cpu(ehdr->addr); else if (ehdr->destined) salt ^= tipc_own_addr(net); memcpy(iv, &salt, 4); @@@ -1492,8 -1492,6 +1492,8 @@@ int tipc_crypto_start(struct tipc_crypt /* Allocate statistic structure */ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { + if (c->wq) + destroy_workqueue(c->wq); kfree_sensitive(c); return -ENOMEM; } @@@ -1943,21 -1941,22 +1943,22 @@@ static void tipc_crypto_rcv_complete(st goto rcv; if (tipc_aead_clone(&tmp, aead) < 0) goto rcv; + WARN_ON(!refcount_inc_not_zero(&tmp->refcnt)); if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) { tipc_aead_free(&tmp->rcu); goto rcv; } tipc_aead_put(aead); - aead = tipc_aead_get((struct tipc_aead __force __rcu *)tmp); + aead = tmp; }
if (unlikely(err)) { - tipc_aead_users_dec(aead, INT_MIN); + tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN); goto free_skb; }
/* Set the RX key's user */ - tipc_aead_users_set(aead, 1); + tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
/* Mark this point, RX works */ rx->timer1 = jiffies; diff --combined net/tipc/net.c index 3f927949bb23,faf6bf554514..a130195af188 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@@ -89,7 -89,7 +89,7 @@@ * - A spin lock to protect the registry of kernel/driver users (reg.c) * - A global spin_lock (tipc_port_lock), which only task is to ensure * consistency where more than one port is involved in an operation, - * i.e., whe a port is part of a linked list of ports. + * i.e., when a port is part of a linked list of ports. * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * @@@ -125,11 -125,6 +125,11 @@@ int tipc_net_init(struct net *net, u8 * static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); + struct tipc_socket_addr sk = {0, addr}; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + TIPC_NODE_STATE, addr, addr);
if (cmpxchg(&tn->node_addr, 0, addr)) return; @@@ -137,7 -132,8 +137,7 @@@ tipc_named_reinit(net); tipc_sk_reinit(net); tipc_mon_reinit_self(net); - tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr, - TIPC_CLUSTER_SCOPE, 0, addr); + tipc_nametbl_publish(net, &ua, &sk, addr); }
void tipc_net_finalize_work(struct work_struct *work) diff --combined net/tipc/node.c index 707d0dc71fad,e0ee83263a39..8217905348f4 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@@ -372,49 -372,42 +372,49 @@@ static struct tipc_node *tipc_node_find }
static void tipc_node_read_lock(struct tipc_node *n) + __acquires(n->lock) { read_lock_bh(&n->lock); }
static void tipc_node_read_unlock(struct tipc_node *n) + __releases(n->lock) { read_unlock_bh(&n->lock); }
static void tipc_node_write_lock(struct tipc_node *n) + __acquires(n->lock) { write_lock_bh(&n->lock); }
static void tipc_node_write_unlock_fast(struct tipc_node *n) + __releases(n->lock) { write_unlock_bh(&n->lock); }
static void tipc_node_write_unlock(struct tipc_node *n) + __releases(n->lock) { + struct tipc_socket_addr sk; struct net *net = n->net; - u32 addr = 0; u32 flags = n->action_flags; - u32 link_id = 0; - u32 bearer_id; struct list_head *publ_list; + struct tipc_uaddr ua; + u32 bearer_id;
if (likely(!flags)) { write_unlock_bh(&n->lock); return; }
- addr = n->addr; - link_id = n->link_id; - bearer_id = link_id & 0xffff; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + TIPC_LINK_STATE, n->addr, n->addr); + sk.ref = n->link_id; + sk.node = n->addr; + bearer_id = n->link_id & 0xffff; publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@@ -423,18 -416,20 +423,18 @@@ write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr, n->capabilities); + tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr, n->capabilities); + tipc_named_node_up(net, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, addr, bearer_id); - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, link_id); + tipc_mon_peer_up(net, n->addr, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, n->link_id); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, addr, bearer_id); - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - addr, link_id); + tipc_mon_peer_down(net, n->addr, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); } }
@@@ -1739,7 -1734,7 +1739,7 @@@ int tipc_node_xmit(struct net *net, str }
/* tipc_node_xmit_skb(): send single buffer to destination - * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. @@@ -2014,7 -2009,7 +2014,7 @@@ static bool tipc_node_check_state(struc return true; }
- /* No synching needed if only one link */ + /* No syncing needed if only one link */ if (!pl || !tipc_link_is_up(pl)) return true;
diff --combined net/tipc/socket.c index f21162aa0cf7,022999e0202d..58935cd0d068 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@@ -3,7 -3,7 +3,7 @@@ * * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@@ -111,6 -111,7 +111,6 @@@ struct tipc_sock struct sock sk; u32 conn_type; u32 conn_instance; - int published; u32 max_pkt; u32 maxnagle; u32 portid; @@@ -140,7 -141,6 +140,7 @@@ bool expect_ack; bool nodelay; bool group_is_open; + bool published; };
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@@ -151,8 -151,10 +151,8 @@@ static int tipc_release(struct socket * static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); static void tipc_sk_timeout(struct timer_list *t); -static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq); -static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq); +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); @@@ -642,7 -644,7 +642,7 @@@ static int tipc_release(struct socket * __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); - tipc_sk_withdraw(tsk, 0, NULL); + tipc_sk_withdraw(tsk, NULL); __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@@ -675,31 -677,22 +675,31 @@@ */ static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + bool unbind = false;
if (unlikely(!alen)) - return tipc_sk_withdraw(tsk, 0, NULL); + return tipc_sk_withdraw(tsk, NULL);
- if (addr->addrtype == TIPC_SERVICE_ADDR) - addr->addr.nameseq.upper = addr->addr.nameseq.lower; + if (ua->addrtype == TIPC_SERVICE_ADDR) { + ua->addrtype = TIPC_SERVICE_RANGE; + ua->sr.upper = ua->sr.lower; + } + if (ua->scope < 0) { + unbind = true; + ua->scope = -ua->scope; + } + /* Users may still use deprecated TIPC_ZONE_SCOPE */ + if (ua->scope != TIPC_NODE_SCOPE) + ua->scope = TIPC_CLUSTER_SCOPE;
if (tsk->group) return -EACCES;
- if (addr->scope >= 0) - return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq); - else - return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); + if (unbind) + return tipc_sk_withdraw(tsk, ua); + return tipc_sk_publish(tsk, ua); }
int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) @@@ -714,17 -707,18 +714,17 @@@
static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + u32 atype = ua->addrtype;
if (alen) { - if (alen < sizeof(struct sockaddr_tipc)) + if (!tipc_uaddr_valid(ua, alen)) return -EINVAL; - if (addr->family != AF_TIPC) + if (atype == TIPC_SOCKET_ADDR) return -EAFNOSUPPORT; - if (addr->addrtype > TIPC_SERVICE_ADDR) - return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) { + if (ua->sr.type < TIPC_RESERVED_TYPES) { pr_warn_once("Can't bind to reserved service type %u\n", - addr->addr.nameseq.type); + ua->sr.type); return -EACCES; } } @@@ -832,7 -826,7 +832,7 @@@ static __poll_t tipc_poll(struct file * /** * tipc_sendmcast - send multicast message * @sock: socket structure - * @seq: destination address + * @ua: destination address struct * @msg: message to send * @dlen: length of data to send * @timeout: timeout to wait for wakeup @@@ -840,7 -834,7 +840,7 @@@ * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ -static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, +static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua, struct msghdr *msg, size_t dlen, long timeout) { struct sock *sk = sock->sk; @@@ -848,6 -842,7 +848,6 @@@ struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); - struct tipc_mc_method *method = &tsk->mc_method; struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@@ -862,7 -857,8 +862,7 @@@
/* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); - tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, &dsts); + tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH;
@@@ -872,9 -868,9 +872,9 @@@ msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); + msg_set_nametype(hdr, ua->sr.type); + msg_set_namelower(hdr, ua->sr.lower); + msg_set_nameupper(hdr, ua->sr.upper);
/* Build message as chain of buffers */ __skb_queue_head_init(&pkts); @@@ -884,7 -880,7 +884,7 @@@ if (unlikely(rc == dlen)) { trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_mcast_xmit(net, &pkts, method, &dsts, + rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts, &tsk->cong_link_cnt); }
@@@ -958,7 -954,7 +958,7 @@@ static int tipc_send_group_unicast(stru int dlen, long timeout) { struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); @@@ -966,8 -962,8 +966,8 @@@ u32 node, port; int rc;
- node = dest->addr.id.node; - port = dest->addr.id.ref; + node = ua->sk.node; + port = ua->sk.ref; if (!port && !node) return -EHOSTUNREACH;
@@@ -1001,7 -997,7 +1001,7 @@@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct list_head *cong_links = &tsk->cong_links; @@@ -1012,13 -1008,16 +1012,13 @@@ struct net *net = sock_net(sk); u32 node, port, exclude; struct list_head dsts; - u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong;
INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr);
while (++lookups < 4) { exclude = tipc_group_exclude(tsk->group); @@@ -1027,8 -1026,8 +1027,8 @@@
/* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, false)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, + exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); cong = tipc_group_cong(tsk->group, node, port, blks, @@@ -1083,7 -1082,7 +1083,7 @@@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); @@@ -1108,9 -1107,9 +1108,9 @@@ return -EHOSTUNREACH;
/* Complete message header */ - if (dest) { + if (ua) { msg_set_type(hdr, TIPC_GRP_MCAST_MSG); - msg_set_nameinst(hdr, dest->addr.name.name.instance); + msg_set_nameinst(hdr, ua->sa.instance); } else { msg_set_type(hdr, TIPC_GRP_BCAST_MSG); msg_set_nameinst(hdr, 0); @@@ -1157,25 -1156,29 +1157,25 @@@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 type, inst, scope, exclude; struct list_head dsts; - u32 dstcnt; + u32 dstcnt, exclude;
INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp);
- if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, true)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true)) return -EHOSTUNREACH;
if (dstcnt == 1) { - tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref); return tipc_send_group_unicast(sock, m, dlen, timeout); }
@@@ -1195,18 -1198,17 +1195,18 @@@ void tipc_sk_mcast_rcv(struct net *net struct sk_buff_head *inputq) { u32 self = tipc_own_addr(net); - u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; u32 portid, onode; struct sk_buff_head tmpq; struct list_head dports; struct tipc_msg *hdr; + struct tipc_uaddr ua; int user, mtyp, hlen; bool exact;
__skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); + ua.addrtype = TIPC_SERVICE_RANGE;
skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { @@@ -1215,7 -1217,7 +1215,7 @@@ mtyp = msg_type(hdr); hlen = skb_headroom(skb) + msg_hdr_sz(hdr); onode = msg_orignode(hdr); - type = msg_nametype(hdr); + ua.sr.type = msg_nametype(hdr);
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); @@@ -1230,23 -1232,24 +1230,23 @@@
/* Group messages require exact scope match */ if (msg_in_group(hdr)) { - lower = 0; - upper = ~0; - scope = msg_lookup_scope(hdr); + ua.sr.lower = 0; + ua.sr.upper = ~0; + ua.scope = msg_lookup_scope(hdr); exact = true; } else { /* TIPC_NODE_SCOPE means "any scope" in this context */ if (onode == self) - scope = TIPC_NODE_SCOPE; + ua.scope = TIPC_NODE_SCOPE; else - scope = TIPC_CLUSTER_SCOPE; + ua.scope = TIPC_CLUSTER_SCOPE; exact = false; - lower = msg_namelower(hdr); - upper = msg_nameupper(hdr); + ua.sr.lower = msg_namelower(hdr); + ua.sr.upper = msg_nameupper(hdr); }
/* Create destination port list: */ - tipc_nametbl_mc_lookup(net, type, lower, upper, - scope, exact, &dports); + tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
/* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { @@@ -1262,7 -1265,7 +1262,7 @@@ spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); - kfree_skb(__skb_dequeue(arrvq)); + __skb_dequeue(arrvq); } spin_unlock_bh(&inputq->lock); __skb_queue_purge(&tmpq); @@@ -1414,43 -1417,44 +1414,43 @@@ static int __tipc_sendmsg(struct socke struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_service_range *seq; + struct tipc_socket_addr skaddr; struct sk_buff_head pkts; - u32 dport = 0, dnode = 0; - u32 type = 0, inst = 0; - int mtu, rc; + int atype, mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE;
- if (likely(dest)) { - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - if (unlikely(dest->family != AF_TIPC)) + if (ua) { + if (!tipc_uaddr_valid(ua, m->msg_namelen)) return -EINVAL; + atype = ua->addrtype; }
+ /* If socket belongs to a communication group follow other paths */ if (grp) { - if (!dest) + if (!ua) return tipc_send_group_bcast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_SERVICE_ADDR) + if (atype == TIPC_SERVICE_ADDR) return tipc_send_group_anycast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_SOCKET_ADDR) + if (atype == TIPC_SOCKET_ADDR) return tipc_send_group_unicast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_MCAST) + if (atype == TIPC_SERVICE_RANGE) return tipc_send_group_mcast(sock, m, dlen, timeout); return -EINVAL; }
- if (unlikely(!dest)) { - dest = &tsk->peer; - if (!syn && dest->family != AF_TIPC) + if (!ua) { + ua = (struct tipc_uaddr *)&tsk->peer; + if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; + atype = ua->addrtype; }
if (unlikely(syn)) { @@@ -1460,51 -1464,54 +1460,51 @@@ return -EISCONN; if (tsk->published) return -EOPNOTSUPP; - if (dest->addrtype == TIPC_SERVICE_ADDR) { - tsk->conn_type = dest->addr.name.name.type; - tsk->conn_instance = dest->addr.name.name.instance; + if (atype == TIPC_SERVICE_ADDR) { + tsk->conn_type = ua->sa.type; + tsk->conn_instance = ua->sa.instance; } msg_set_syn(hdr, 1); }
- seq = &dest->addr.nameseq; - if (dest->addrtype == TIPC_ADDR_MCAST) - return tipc_sendmcast(sock, seq, m, dlen, timeout); - - if (dest->addrtype == TIPC_SERVICE_ADDR) { - type = dest->addr.name.name.type; - inst = dest->addr.name.name.instance; - dnode = dest->addr.name.domain; - dport = tipc_nametbl_translate(net, type, inst, &dnode); - if (unlikely(!dport && !dnode)) + /* Determine destination */ + if (atype == TIPC_SERVICE_RANGE) { + return tipc_sendmcast(sock, ua, m, dlen, timeout); + } else if (atype == TIPC_SERVICE_ADDR) { + skaddr.node = ua->lookup_node; + ua->scope = tipc_node2scope(skaddr.node); + if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_SOCKET_ADDR) { - dnode = dest->addr.id.node; + } else if (atype == TIPC_SOCKET_ADDR) { + skaddr = ua->sk; } else { return -EINVAL; }
/* Block or return if destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, - !tipc_dest_find(clinks, dnode, 0)); + !tipc_dest_find(clinks, skaddr.node, 0)); if (unlikely(rc)) return rc;
- if (dest->addrtype == TIPC_SERVICE_ADDR) { + /* Finally build message header */ + msg_set_destnode(hdr, skaddr.node); + msg_set_destport(hdr, skaddr.ref); + if (atype == TIPC_SERVICE_ADDR) { msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); - msg_set_nametype(hdr, type); - msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dport); + msg_set_nametype(hdr, ua->sa.type); + msg_set_nameinst(hdr, ua->sa.instance); + msg_set_lookup_scope(hdr, ua->scope); } else { /* TIPC_SOCKET_ADDR */ msg_set_type(hdr, TIPC_DIRECT_MSG); msg_set_lookup_scope(hdr, 0); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dest->addr.id.ref); msg_set_hdr_sz(hdr, BASIC_H_SIZE); }
+ /* Add message body */ __skb_queue_head_init(&pkts); - mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true); + mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@@ -1513,11 -1520,10 +1513,11 @@@ return -ENOMEM; }
+ /* Send message */ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - tipc_dest_push(clinks, dnode, 0); + tipc_dest_push(clinks, skaddr.node, 0); tsk->cong_link_cnt++; rc = 0; } @@@ -2885,62 -2891,66 +2885,62 @@@ static void tipc_sk_timeout(struct time sock_put(sk); }
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq) +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); - struct publication *publ; + struct tipc_socket_addr skaddr; + struct publication *p; u32 key;
- if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) return -EADDRINUSE; - - publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, - scope, tsk->portid, key); - if (unlikely(!publ)) + skaddr.ref = tsk->portid; + skaddr.node = tipc_own_addr(net); + p = tipc_nametbl_publish(net, ua, &skaddr, key); + if (unlikely(!p)) return -EINVAL;
- list_add(&publ->binding_sock, &tsk->publications); + list_add(&p->binding_sock, &tsk->publications); tsk->pub_count++; - tsk->published = 1; + tsk->published = true; return 0; }
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq) +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct net *net = sock_net(&tsk->sk); - struct publication *publ; - struct publication *safe; + struct publication *safe, *p; + struct tipc_uaddr _ua; int rc = -EINVAL;
- if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - - list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { - if (seq) { - if (publ->scope != scope) - continue; - if (publ->type != seq->type) - continue; - if (publ->lower != seq->lower) - continue; - if (publ->upper != seq->upper) - break; - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); - rc = 0; - break; + list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) { + if (!ua) { + tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope, + p->sr.type, p->sr.lower, p->sr.upper); + tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key); + continue; } - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); + /* Unbind specific publication */ + if (p->scope != ua->scope) + continue; + if (p->sr.type != ua->sr.type) + continue; + if (p->sr.lower != ua->sr.lower) + continue; + if (p->sr.upper != ua->sr.upper) + break; + tipc_nametbl_withdraw(net, ua, &p->sk, p->key); rc = 0; + break; } - if (list_empty(&tsk->publications)) + if (list_empty(&tsk->publications)) { tsk->published = 0; + rc = 0; + } return rc; }
@@@ -3057,15 -3067,13 +3057,15 @@@ static int tipc_sk_join(struct tipc_soc struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_service_range seq; + struct tipc_uaddr ua; int rc;
if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; if (mreq->scope > TIPC_NODE_SCOPE) return -EINVAL; + if (mreq->scope != TIPC_NODE_SCOPE) + mreq->scope = TIPC_CLUSTER_SCOPE; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); @@@ -3075,10 -3083,11 +3075,10 @@@ msg_set_lookup_scope(hdr, mreq->scope); msg_set_nametype(hdr, mreq->type); msg_set_dest_droppable(hdr, true); - seq.type = mreq->type; - seq.lower = mreq->instance; - seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); - rc = tipc_sk_publish(tsk, mreq->scope, &seq); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope, + mreq->type, mreq->instance, mreq->instance); + tipc_nametbl_build_group(net, grp, &ua); + rc = tipc_sk_publish(tsk, &ua); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; @@@ -3095,17 -3104,15 +3095,17 @@@ static int tipc_sk_leave(struct tipc_so { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; - struct tipc_service_range seq; + struct tipc_uaddr ua; int scope;
if (!grp) return -EINVAL; - tipc_group_self(grp, &seq, &scope); + ua.addrtype = TIPC_SERVICE_RANGE; + tipc_group_self(grp, &ua.sr, &scope); + ua.scope = scope; tipc_group_delete(net, grp); tsk->group = NULL; - tipc_sk_withdraw(tsk, scope, &seq); + tipc_sk_withdraw(tsk, &ua); return 0; }
@@@ -3704,11 -3711,11 +3704,11 @@@ static int __tipc_nl_add_sk_publ(struc
if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper)) goto attr_msg_cancel;
nla_nest_end(skb, attrs); @@@ -3856,9 -3863,9 +3856,9 @@@ bool tipc_sk_filtering(struct sock *sk p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); if (p) { - type = p->type; - lower = p->lower; - upper = p->upper; + type = p->sr.type; + lower = p->sr.lower; + upper = p->sr.upper; } }
@@@ -3957,9 -3964,9 +3957,9 @@@ int tipc_sk_dump(struct sock *sk, u16 d if (tsk->published) { p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0); } i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); diff --combined tools/lib/bpf/xsk.c index 95da0e19f4a5,d24b5cc720ec..cea62cc3e456 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@@ -28,7 -28,6 +28,7 @@@ #include <sys/mman.h> #include <sys/socket.h> #include <sys/types.h> +#include <linux/if_link.h>
#include "bpf.h" #include "libbpf.h" @@@ -60,6 -59,8 +60,8 @@@ struct xsk_umem int fd; int refcount; struct list_head ctx_list; + bool rx_ring_setup_done; + bool tx_ring_setup_done; };
struct xsk_ctx { @@@ -71,10 -72,8 +73,10 @@@ int ifindex; struct list_head list; int prog_fd; + int link_fd; int xsks_map_fd; char ifname[IFNAMSIZ]; + bool has_bpf_link; };
struct xsk_socket { @@@ -412,7 -411,7 +414,7 @@@ static int xsk_load_xdp_prog(struct xsk static const int log_buf_size = 16 * 1024; struct xsk_ctx *ctx = xsk->ctx; char log_buf[log_buf_size]; - int err, prog_fd; + int prog_fd;
/* This is the fallback C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) @@@ -502,41 -501,14 +504,41 @@@ return prog_fd; }
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd, - xsk->config.xdp_flags); + ctx->prog_fd = prog_fd; + return 0; +} + +static int xsk_create_bpf_link(struct xsk_socket *xsk) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int link_fd; + int err; + + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); if (err) { - close(prog_fd); + pr_warn("getting XDP prog id failed\n"); return err; }
- ctx->prog_fd = prog_fd; + /* if there's a netlink-based XDP prog loaded on interface, bail out + * and ask user to do the removal by himself + */ + if (prog_id) { + pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); + return -EINVAL; + } + + opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); + + link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); + if (link_fd < 0) { + pr_warn("bpf_link_create failed: %s\n", strerror(errno)); + return link_fd; + } + + ctx->link_fd = link_fd; return 0; }
@@@ -655,6 -627,7 +657,6 @@@ static int xsk_lookup_bpf_maps(struct x close(fd); }
- err = 0; if (ctx->xsks_map_fd == -1) err = -ENOENT;
@@@ -671,98 -644,6 +673,98 @@@ static int xsk_set_bpf_maps(struct xsk_ &xsk->fd, 0); }
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) +{ + struct bpf_link_info link_info; + __u32 link_len; + __u32 id = 0; + int err; + int fd; + + while (true) { + err = bpf_link_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + pr_warn("can't get next link: %s\n", strerror(errno)); + break; + } + + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); + err = -errno; + break; + } + + link_len = sizeof(struct bpf_link_info); + memset(&link_info, 0, link_len); + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); + if (err) { + pr_warn("can't get link info: %s\n", strerror(errno)); + close(fd); + break; + } + if (link_info.type == BPF_LINK_TYPE_XDP) { + if (link_info.xdp.ifindex == ifindex) { + *link_fd = fd; + if (prog_id) + *prog_id = link_info.prog_id; + break; + } + } + close(fd); + } + + return err; +} + +static bool xsk_probe_bpf_link(void) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, + .flags = XDP_FLAGS_SKB_MODE); + struct bpf_load_program_attr prog_attr; + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), + BPF_EXIT_INSN() + }; + int prog_fd, link_fd = -1; + int ifindex_lo = 1; + bool ret = false; + int err; + + err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); + if (err) + return ret; + + if (link_fd >= 0) + return true; + + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + if (prog_fd < 0) + return ret; + + link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); + close(prog_fd); + + if (link_fd >= 0) { + ret = true; + close(link_fd); + } + + return ret; +} + static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) { char ifname[IFNAMSIZ]; @@@ -784,108 -665,64 +786,108 @@@ ctx->ifname[IFNAMSIZ - 1] = 0;
xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
return 0; }
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, - int *xsks_map_fd) +static int xsk_init_xdp_res(struct xsk_socket *xsk, + int *xsks_map_fd) { - struct xsk_socket *xsk = _xdp; struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; int err;
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, - xsk->config.xdp_flags); + err = xsk_create_bpf_maps(xsk); if (err) return err;
- if (!prog_id) { - err = xsk_create_bpf_maps(xsk); - if (err) - return err; + err = xsk_load_xdp_prog(xsk); + if (err) + goto err_load_xdp_prog;
- err = xsk_load_xdp_prog(xsk); - if (err) { - goto err_load_xdp_prog; - } - } else { - ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (ctx->prog_fd < 0) - return -errno; - err = xsk_lookup_bpf_maps(xsk); - if (err) { - close(ctx->prog_fd); - return err; - } - } + if (ctx->has_bpf_link) + err = xsk_create_bpf_link(xsk); + else + err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd, + xsk->config.xdp_flags);
- if (xsk->rx) { - err = xsk_set_bpf_maps(xsk); - if (err) { - if (!prog_id) { - goto err_set_bpf_maps; - } else { - close(ctx->prog_fd); - return err; - } - } - } - if (xsks_map_fd) - *xsks_map_fd = ctx->xsks_map_fd; + if (err) + goto err_attach_xdp_prog;
- return 0; + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_bpf_maps; + + return err;
err_set_bpf_maps: + if (ctx->has_bpf_link) + close(ctx->link_fd); + else + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); +err_attach_xdp_prog: close(ctx->prog_fd); - bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); err_load_xdp_prog: xsk_delete_bpf_maps(xsk); + return err; +} + +static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) +{ + struct xsk_ctx *ctx = xsk->ctx; + int err; + + ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (ctx->prog_fd < 0) { + err = -errno; + goto err_prog_fd; + } + err = xsk_lookup_bpf_maps(xsk); + if (err) + goto err_lookup_maps; + + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_maps; + + return err; + +err_set_maps: + close(ctx->xsks_map_fd); +err_lookup_maps: + close(ctx->prog_fd); +err_prog_fd: + if (ctx->has_bpf_link) + close(ctx->link_fd); + return err; +} + +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) +{ + struct xsk_socket *xsk = _xdp; + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int err; + + if (ctx->has_bpf_link) + err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); + else + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); + + if (err) + return err; + + err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : + xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); + + if (!err && xsks_map_fd) + *xsks_map_fd = ctx->xsks_map_fd;
return err; } @@@ -908,26 -745,30 +910,30 @@@ static struct xsk_ctx *xsk_get_ctx(stru return NULL; }
- static void xsk_put_ctx(struct xsk_ctx *ctx) + static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) { struct xsk_umem *umem = ctx->umem; struct xdp_mmap_offsets off; int err;
- if (--ctx->refcount == 0) { - err = xsk_get_mmap_offsets(umem->fd, &off); - if (!err) { - munmap(ctx->fill->ring - off.fr.desc, - off.fr.desc + umem->config.fill_size * - sizeof(__u64)); - munmap(ctx->comp->ring - off.cr.desc, - off.cr.desc + umem->config.comp_size * - sizeof(__u64)); - } + if (--ctx->refcount) + return;
- list_del(&ctx->list); - free(ctx); - } + if (!unmap) + goto out_free; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (err) + goto out_free; + + munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * + sizeof(__u64)); + munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * + sizeof(__u64)); + + out_free: + list_del(&ctx->list); + free(ctx); }
static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, @@@ -962,8 -803,6 +968,6 @@@ memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); ctx->ifname[IFNAMSIZ - 1] = '\0';
- umem->fill_save = NULL; - umem->comp_save = NULL; ctx->fill = fill; ctx->comp = comp; list_add(&ctx->list, &umem->ctx_list); @@@ -1019,6 -858,8 +1023,8 @@@ int xsk_socket__create_shared(struct xs struct xsk_socket *xsk; struct xsk_ctx *ctx; int err, ifindex; + bool unmap = umem->fill_save != fill; + bool rx_setup_done = false, tx_setup_done = false;
if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; @@@ -1046,6 -887,8 +1052,8 @@@ } } else { xsk->fd = umem->fd; + rx_setup_done = umem->rx_ring_setup_done; + tx_setup_done = umem->tx_ring_setup_done; }
ctx = xsk_get_ctx(umem, ifindex, queue_id); @@@ -1063,9 -906,8 +1071,9 @@@ } } xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
- if (rx) { + if (rx && !rx_setup_done) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, &xsk->config.rx_size, sizeof(xsk->config.rx_size)); @@@ -1073,8 -915,10 +1081,10 @@@ err = -errno; goto out_put_ctx; } + if (xsk->fd == umem->fd) + umem->rx_ring_setup_done = true; } - if (tx) { + if (tx && !tx_setup_done) { err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, &xsk->config.tx_size, sizeof(xsk->config.tx_size)); @@@ -1082,6 -926,8 +1092,8 @@@ err = -errno; goto out_put_ctx; } + if (xsk->fd == umem->fd) + umem->rx_ring_setup_done = true; }
err = xsk_get_mmap_offsets(xsk->fd, &off); @@@ -1160,6 -1006,8 +1172,8 @@@ }
*xsk_ptr = xsk; + umem->fill_save = NULL; + umem->comp_save = NULL; return 0;
out_mmap_tx: @@@ -1171,7 -1019,7 +1185,7 @@@ out_mmap_rx munmap(rx_map, off.rx.desc + xsk->config.rx_size * sizeof(struct xdp_desc)); out_put_ctx: - xsk_put_ctx(ctx); + xsk_put_ctx(ctx, unmap); out_socket: if (--umem->refcount) close(xsk->fd); @@@ -1185,6 -1033,9 +1199,9 @@@ int xsk_socket__create(struct xsk_socke struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *usr_config) { + if (!umem) + return -EFAULT; + return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, rx, tx, umem->fill_save, umem->comp_save, usr_config); @@@ -1220,8 -1071,6 +1237,8 @@@ void xsk_socket__delete(struct xsk_sock if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); + if (ctx->has_bpf_link) + close(ctx->link_fd); }
err = xsk_get_mmap_offsets(xsk->fd, &off); @@@ -1236,7 -1085,7 +1253,7 @@@ } }
- xsk_put_ctx(ctx); + xsk_put_ctx(ctx, true);
umem->refcount--; /* Do not close an fd that also has an associated umem connected