The following commit has been merged in the master branch: commit bde8ef804c0dcf62b73cba01832e45f03f27694c Merge: 47c4a12a09dd16af5c7453b7f2a145f5112b4bb2 cc7335786f7278d66bdcf96d3d411edfcb01be51 Author: Stephen Rothwell sfr@canb.auug.org.au Date: Tue Feb 5 10:58:36 2019 +1100
Merge remote-tracking branch 'net-next/master'
diff --combined Documentation/networking/index.rst index d6f9537afb27,9a32451cd201..4ef24c6a638a --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@@ -24,17 -24,16 +24,19 @@@ Contents device_drivers/intel/i40e device_drivers/intel/iavf device_drivers/intel/ice + devlink-info-versions kapi z8530book msg_zerocopy failover net_failover + phy alias bridge snmp_counter + checksum-offloads + segmentation-offloads + scaling
.. only:: subproject
diff --combined MAINTAINERS index 0605b0c10814,019a2bcfbd09..a35fc56b8121 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -365,7 -365,6 +365,7 @@@ M: Lorenzo Pieralisi <lorenzo.pieralisi M: Hanjun Guo hanjun.guo@linaro.org M: Sudeep Holla sudeep.holla@arm.com L: linux-acpi@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/acpi/arm64
@@@ -410,9 -409,10 +410,9 @@@ F: drivers/platform/x86/wmi. F: include/uapi/linux/wmi.h
AD1889 ALSA SOUND DRIVER -M: Thibaut Varene T-Bone@parisc-linux.org -W: http://wiki.parisc-linux.org/AD1889 +W: https://parisc.wiki.kernel.org/index.php/AD1889 L: linux-parisc@vger.kernel.org -S: Maintained +S: Orphan F: sound/pci/ad1889.*
AD525X ANALOG DEVICES DIGITAL POTENTIOMETERS DRIVER @@@ -1159,7 -1159,7 +1159,7 @@@ F: arch/arm*/include/asm/hw_breakpoint. F: arch/arm*/include/asm/perf_event.h F: drivers/perf/* F: include/linux/perf/arm_pmu.h -F: Documentation/devicetree/bindings/arm/pmu.txt +F: Documentation/devicetree/bindings/arm/pmu.yaml F: Documentation/devicetree/bindings/perf/
ARM PORT @@@ -1531,14 -1531,21 +1531,14 @@@ ARM/FREESCALE IMX / MXC ARM ARCHITECTUR M: Shawn Guo shawnguo@kernel.org M: Sascha Hauer s.hauer@pengutronix.de R: Pengutronix Kernel Team kernel@pengutronix.de -R: Fabio Estevam fabio.estevam@nxp.com +R: Fabio Estevam festevam@gmail.com R: NXP Linux Team linux-imx@nxp.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git -F: arch/arm/mach-imx/ -F: arch/arm/mach-mxs/ -F: arch/arm/boot/dts/imx* -F: arch/arm/configs/imx*_defconfig -F: arch/arm64/boot/dts/freescale/imx* -F: drivers/clk/imx/ -F: drivers/firmware/imx/ -F: drivers/soc/imx/ -F: include/linux/firmware/imx/ -F: include/soc/imx/ +N: imx +N: mxs +X: drivers/media/i2c/
ARM/FREESCALE VYBRID ARM ARCHITECTURE M: Shawn Guo shawnguo@kernel.org @@@ -1730,7 -1737,6 +1730,7 @@@ F: arch/arm/configs/mvebu_*_defconfi F: arch/arm/mach-mvebu/ F: arch/arm64/boot/dts/marvell/armada* F: drivers/cpufreq/armada-37xx-cpufreq.c +F: drivers/cpufreq/armada-8k-cpufreq.c F: drivers/cpufreq/mvebu-cpufreq.c F: drivers/irqchip/irq-armada-370-xp.c F: drivers/irqchip/irq-mvebu-* @@@ -1942,37 -1948,19 +1942,37 @@@ M: David Brown <david.brown@linaro.org L: linux-arm-msm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/soc/qcom/ +F: Documentation/devicetree/bindings/*/qcom* F: arch/arm/boot/dts/qcom-*.dts F: arch/arm/boot/dts/qcom-*.dtsi F: arch/arm/mach-qcom/ -F: arch/arm64/boot/dts/qcom/* +F: arch/arm64/boot/dts/qcom/ +F: drivers/*/qcom/ +F: drivers/*/qcom* +F: drivers/*/*/qcom/ +F: drivers/*/*/qcom* +F: drivers/*/pm8???-* +F: drivers/bluetooth/btqcomsmd.c +F: drivers/clocksource/timer-qcom.c +F: drivers/extcon/extcon-qcom* +F: drivers/iommu/msm* F: drivers/i2c/busses/i2c-qup.c -F: drivers/clk/qcom/ -F: drivers/dma/qcom/ -F: drivers/soc/qcom/ +F: drivers/i2c/busses/i2c-qcom-geni.c +F: drivers/mfd/ssbi.c +F: drivers/mmc/host/mmci_qcom* +F: drivers/mmc/host/sdhci_msm.c +F: drivers/pci/controller/dwc/pcie-qcom.c +F: drivers/phy/qualcomm/ +F: drivers/power/*/msm* +F: drivers/reset/reset-qcom-* +F: drivers/scsi/ufs/ufs-qcom.* F: drivers/spi/spi-qup.c +F: drivers/spi/spi-geni-qcom.c +F: drivers/spi/spi-qcom-qspi.c F: drivers/tty/serial/msm_serial.c -F: drivers/*/pm8???-* -F: drivers/mfd/ssbi.c -F: drivers/firmware/qcom_scm* +F: drivers/usb/dwc3/dwc3-qcom.c +F: include/dt-bindings/*/qcom* +F: include/linux/*/qcom* T: git git://git.kernel.org/pub/scm/linux/kernel/git/agross/linux.git
ARM/RADISYS ENP2611 MACHINE SUPPORT @@@ -2009,7 -1997,7 +2009,7 @@@ Q: http://patchwork.kernel.org/project/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next S: Supported F: arch/arm64/boot/dts/renesas/ -F: Documentation/devicetree/bindings/arm/shmobile.txt +F: Documentation/devicetree/bindings/arm/renesas.yaml F: drivers/soc/renesas/ F: include/linux/soc/renesas/
@@@ -2096,9 -2084,8 +2096,9 @@@ F: drivers/media/platform/s5p-cec F: Documentation/devicetree/bindings/media/s5p-cec.txt
ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT -M: Andrzej Pietrasiewicz andrzej.p@samsung.com +M: Andrzej Pietrasiewicz andrzejtp2010@gmail.com M: Jacek Anaszewski jacek.anaszewski@gmail.com +M: Sylwester Nawrocki s.nawrocki@samsung.com L: linux-arm-kernel@lists.infradead.org L: linux-media@vger.kernel.org S: Maintained @@@ -2122,8 -2109,6 +2122,8 @@@ Q: http://patchwork.kernel.org/project/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next S: Supported F: arch/arm/boot/dts/emev2* +F: arch/arm/boot/dts/gr-peach* +F: arch/arm/boot/dts/iwg20d-q7* F: arch/arm/boot/dts/r7s* F: arch/arm/boot/dts/r8a* F: arch/arm/boot/dts/r9a* @@@ -2131,7 -2116,7 +2131,7 @@@ F: arch/arm/boot/dts/sh F: arch/arm/configs/shmobile_defconfig F: arch/arm/include/debug/renesas-scif.S F: arch/arm/mach-shmobile/ -F: Documentation/devicetree/bindings/arm/shmobile.txt +F: Documentation/devicetree/bindings/arm/renesas.yaml F: drivers/soc/renesas/ F: include/linux/soc/renesas/
@@@ -2863,9 -2848,6 +2863,9 @@@ F: include/uapi/linux/if_bonding. BPF (Safe dynamic programs and tools) M: Alexei Starovoitov ast@kernel.org M: Daniel Borkmann daniel@iogearbox.net +R: Martin KaFai Lau kafai@fb.com +R: Song Liu songliubraving@fb.com +R: Yonghong Song yhs@fb.com L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git @@@ -2891,8 -2873,6 +2891,8 @@@ F: samples/bpf F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ +K: bpf +N: bpf
BPF JIT for ARM M: Shubham Bansal illusionist.neo@gmail.com @@@ -3976,7 -3956,7 +3976,7 @@@ M: Viresh Kumar <viresh.kumar@linaro.or L: linux-pm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git -T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) +T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git (For ARM Updates) B: https://bugzilla.kernel.org F: Documentation/admin-guide/pm/cpufreq.rst F: Documentation/admin-guide/pm/intel_pstate.rst @@@ -4036,7 -4016,6 +4036,7 @@@ S: Maintaine T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git B: https://bugzilla.kernel.org F: Documentation/admin-guide/pm/cpuidle.rst +F: Documentation/driver-api/pm/cpuidle.rst F: drivers/cpuidle/* F: include/linux/cpuidle.h
@@@ -4144,7 -4123,7 +4144,7 @@@ S: Maintaine F: drivers/media/dvb-frontends/cxd2820r*
CXGB3 ETHERNET DRIVER (CXGB3) - M: Arjun Vynipadath arjun@chelsio.com + M: Vishal Kulkarni vishal@chelsio.com L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@@ -4173,7 -4152,7 +4173,7 @@@ S: Supporte F: drivers/crypto/chelsio
CXGB4 ETHERNET DRIVER (CXGB4) - M: Arjun Vynipadath arjun@chelsio.com + M: Vishal Kulkarni vishal@chelsio.com L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@@ -5903,7 -5882,6 +5903,7 @@@ L: linux-fsdevel@vger.kernel.or S: Maintained F: fs/* F: include/linux/fs.h +F: include/linux/fs_types.h F: include/uapi/linux/fs.h
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER @@@ -6046,6 -6024,12 +6046,12 @@@ L: linuxppc-dev@lists.ozlabs.or S: Maintained F: drivers/dma/fsldma.*
+ FREESCALE ENETC ETHERNET DRIVERS + M: Claudiu Manoil claudiu.manoil@nxp.com + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/ethernet/freescale/enetc/ + FREESCALE eTSEC ETHERNET DRIVER (GIANFAR) M: Claudiu Manoil claudiu.manoil@nxp.com L: netdev@vger.kernel.org @@@ -6110,6 -6094,7 +6116,7 @@@ M: Yangbo Lu <yangbo.lu@nxp.com L: netdev@vger.kernel.org S: Maintained F: drivers/ptp/ptp_qoriq.c + F: drivers/ptp/ptp_qoriq_debugfs.c F: include/linux/fsl/ptp_qoriq.h F: Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@@ -6168,7 -6153,7 +6175,7 @@@ FREESCALE SOC SOUND DRIVER M: Timur Tabi timur@kernel.org M: Nicolin Chen nicoleotsuka@gmail.com M: Xiubo Li Xiubo.Lee@gmail.com -R: Fabio Estevam fabio.estevam@nxp.com +R: Fabio Estevam festevam@gmail.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linuxppc-dev@lists.ozlabs.org S: Maintained @@@ -7720,6 -7705,7 +7727,6 @@@ M: Yong Zhi <yong.zhi@intel.com M: Sakari Ailus sakari.ailus@linux.intel.com M: Bingbu Cao bingbu.cao@intel.com R: Tian Shu Qiu tian.shu.qiu@intel.com -R: Jian Xu Zheng jian.xu.zheng@intel.com L: linux-media@vger.kernel.org S: Maintained F: drivers/media/pci/intel/ipu3/ @@@ -10598,6 -10584,7 +10605,7 @@@ F: Documentation/devicetree/bindings/ne F: net/dsa/ F: include/net/dsa.h F: include/linux/dsa/ + F: include/linux/platform_data/dsa.h F: drivers/net/dsa/
NETWORKING [GENERAL] @@@ -10914,7 -10901,7 +10922,7 @@@ F: include/linux/nvmem-consumer. F: include/linux/nvmem-provider.h
NXP SGTL5000 DRIVER -M: Fabio Estevam fabio.estevam@nxp.com +M: Fabio Estevam festevam@gmail.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/sgtl5000.txt @@@ -11253,19 -11240,6 +11261,19 @@@ S: Maintaine F: drivers/media/i2c/ov7740.c F: Documentation/devicetree/bindings/media/i2c/ov7740.txt
+OMNIVISION OV9640 SENSOR DRIVER +M: Petr Cvek petrcvekcz@gmail.com +L: linux-media@vger.kernel.org +S: Maintained +F: drivers/media/i2c/ov9640.* + +OMNIVISION OV8856 SENSOR DRIVER +M: Ben Kao ben.kao@intel.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov8856.c + OMNIVISION OV9650 SENSOR DRIVER M: Sakari Ailus sakari.ailus@linux.intel.com R: Akinobu Mita akinobu.mita@gmail.com @@@ -11515,7 -11489,7 +11523,7 @@@ F: Documentation/blockdev/paride.tx F: drivers/block/paride/
PARISC ARCHITECTURE -M: "James E.J. Bottomley" jejb@parisc-linux.org +M: "James E.J. Bottomley" James.Bottomley@HansenPartnership.com M: Helge Deller deller@gmx.de L: linux-parisc@vger.kernel.org W: http://www.parisc-linux.org/ @@@ -12406,7 -12380,6 +12414,7 @@@ L: linux-media@vger.kernel.or T: git git://linuxtv.org/media_tree.git S: Odd Fixes F: drivers/media/usb/pwc/* +F: include/trace/events/pwc.h
PWM FAN DRIVER M: Kamil Debski kamil@wypas.org @@@ -12632,11 -12605,11 +12640,11 @@@ F: Documentation/media/v4l-drivers/qcom F: drivers/media/platform/qcom/camss/
QUALCOMM CPUFREQ DRIVER MSM8996/APQ8096 -M: Ilia Lin ilia.lin@gmail.com -L: linux-pm@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt -F: drivers/cpufreq/qcom-cpufreq-kryo.c +M: Ilia Lin ilia.lin@kernel.org +L: linux-pm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt +F: drivers/cpufreq/qcom-cpufreq-kryo.c
QUALCOMM EMAC GIGABIT ETHERNET DRIVER M: Timur Tabi timur@kernel.org @@@ -12644,6 -12617,14 +12652,14 @@@ L: netdev@vger.kernel.or S: Maintained F: drivers/net/ethernet/qualcomm/emac/
+ QUALCOMM ETHQOS ETHERNET DRIVER + M: Vinod Koul vkoul@kernel.org + M: Niklas Cassel niklas.cassel@linaro.org + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c + F: Documentation/devicetree/bindings/net/qcom,dwmac.txt + QUALCOMM GENERIC INTERFACE I2C DRIVER M: Alok Chauhan alokc@codeaurora.org M: Karthikeyan Ramasubramanian kramasub@codeaurora.org @@@ -12903,13 -12884,6 +12919,13 @@@ F: Documentation/devicetree/bindings/ne F: drivers/net/dsa/realtek-smi* F: drivers/net/dsa/rtl83*
+REDPINE WIRELESS DRIVER +M: Amitkumar Karwar amitkarwar@gmail.com +M: Siva Rebbagondla siva8118@gmail.com +L: linux-wireless@vger.kernel.org +S: Maintained +F: drivers/net/wireless/rsi/ + REGISTER MAP ABSTRACTION M: Mark Brown broonie@kernel.org L: linux-kernel@vger.kernel.org @@@ -13000,7 -12974,6 +13016,7 @@@ F: drivers/reset F: Documentation/devicetree/bindings/reset/ F: include/dt-bindings/reset/ F: include/linux/reset.h +F: include/linux/reset/ F: include/linux/reset-controller.h
RESTARTABLE SEQUENCES SUPPORT @@@ -16684,15 -16657,6 +16700,15 @@@ S: Maintaine F: drivers/platform/x86/ F: drivers/platform/olpc/
+X86 PLATFORM DRIVERS - ARCH +R: Darren Hart dvhart@infradead.org +R: Andy Shevchenko andy@infradead.org +L: platform-driver-x86@vger.kernel.org +L: x86@kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core +S: Maintained +F: arch/x86/platform + X86 VDSO M: Andy Lutomirski luto@kernel.org L: linux-kernel@vger.kernel.org diff --combined arch/arm/boot/dts/ls1021a.dtsi index 114e41e290e9,ad75959b99c1..b4f2723ecd86 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@@ -45,12 -45,11 +45,12 @@@ * OTHER DEALINGS IN THE SOFTWARE. */
-#include "skeleton64.dtsi" #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/thermal/thermal.h>
/ { + #address-cells = <2>; + #size-cells = <2>; compatible = "fsl,ls1021a"; interrupt-parent = <&gic>;
@@@ -89,11 -88,6 +89,11 @@@ }; };
+ memory { + device_type = "memory"; + reg = <0x0 0x0 0x0 0x0>; + }; + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; @@@ -131,13 -125,6 +131,13 @@@ interrupt-parent = <&gic>; ranges;
+ ddr: memory-controller@1080000 { + compatible = "fsl,qoriq-memory-controller"; + reg = <0x0 0x1080000 0x0 0x1000>; + interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>; + big-endian; + }; + gic: interrupt-controller@1400000 { compatible = "arm,gic-400", "arm,cortex-a7-gic"; #interrupt-cells = <3>; @@@ -719,6 -706,7 +719,7 @@@ fsl,tmr-fiper1 = <999999995>; fsl,tmr-fiper2 = <99990>; fsl,max-adj = <499999999>; + fsl,extts-fifo; };
enet0: ethernet@2d10000 { @@@ -824,7 -812,6 +825,7 @@@ dr_mode = "host"; snps,quirk-frame-length-adjustment = <0x20>; snps,dis_rxdet_inp3_quirk; + snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>; };
pcie@3400000 { @@@ -838,7 -825,6 +839,7 @@@ #size-cells = <2>; device_type = "pci"; num-lanes = <4>; + num-viewport = <6>; bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ @@@ -863,7 -849,6 +864,7 @@@ #size-cells = <2>; device_type = "pci"; num-lanes = <4>; + num-viewport = <6>; bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000 /* downstream I/O */ 0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */ diff --combined drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index c500ea77aaa0,04925c731f0b..fd516e6aa395 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@@ -1243,34 -1243,36 +1243,36 @@@ enable_err return err; }
- /* The DPIO store must be empty when we call this, - * at the end of every NAPI cycle. - */ - static u32 drain_channel(struct dpaa2_eth_channel *ch) + /* Total number of in-flight frames on ingress queues */ + static u32 ingress_fq_count(struct dpaa2_eth_priv *priv) { - u32 drained = 0, total = 0; + struct dpaa2_eth_fq *fq; + u32 fcnt = 0, bcnt = 0, total = 0; + int i, err;
- do { - pull_channel(ch); - drained = consume_frames(ch, NULL); - total += drained; - } while (drained); + for (i = 0; i < priv->num_fqs; i++) { + fq = &priv->fq[i]; + err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt); + if (err) { + netdev_warn(priv->net_dev, "query_fq_count failed"); + break; + } + total += fcnt; + }
return total; }
- static u32 drain_ingress_frames(struct dpaa2_eth_priv *priv) + static void wait_for_fq_empty(struct dpaa2_eth_priv *priv) { - struct dpaa2_eth_channel *ch; - int i; - u32 drained = 0; - - for (i = 0; i < priv->num_channels; i++) { - ch = priv->channel[i]; - drained += drain_channel(ch); - } + int retries = 10; + u32 pending;
- return drained; + do { + pending = ingress_fq_count(priv); + if (pending) + msleep(100); + } while (pending && --retries); }
static int dpaa2_eth_stop(struct net_device *net_dev) @@@ -1278,14 -1280,22 +1280,22 @@@ struct dpaa2_eth_priv *priv = netdev_priv(net_dev); int dpni_enabled = 0; int retries = 10; - u32 drained;
netif_tx_stop_all_queues(net_dev); netif_carrier_off(net_dev);
- /* Loop while dpni_disable() attempts to drain the egress FQs - * and confirm them back to us. + /* On dpni_disable(), the MC firmware will: + * - stop MAC Rx and wait for all Rx frames to be enqueued to software + * - cut off WRIOP dequeues from egress FQs and wait until transmission + * of all in flight Tx frames is finished (and corresponding Tx conf + * frames are enqueued back to software) + * + * Before calling dpni_disable(), we wait for all Tx frames to arrive + * on WRIOP. After it finishes, wait until all remaining frames on Rx + * and Tx conf queues are consumed on NAPI poll. */ + msleep(500); + do { dpni_disable(priv->mc_io, 0, priv->mc_token); dpni_is_enabled(priv->mc_io, 0, priv->mc_token, &dpni_enabled); @@@ -1300,19 -1310,9 +1310,9 @@@ */ }
- /* Wait for NAPI to complete on every core and disable it. - * In particular, this will also prevent NAPI from being rescheduled if - * a new CDAN is serviced, effectively discarding the CDAN. We therefore - * don't even need to disarm the channels, except perhaps for the case - * of a huge coalescing value. - */ + wait_for_fq_empty(priv); disable_ch_napi(priv);
- /* Manually drain the Rx and TxConf queues */ - drained = drain_ingress_frames(priv); - if (drained) - netdev_dbg(net_dev, "Drained %d frames.\n", drained); - /* Empty the buffer pool */ drain_pool(priv);
@@@ -1902,7 -1902,7 +1902,7 @@@ static int setup_dpio(struct dpaa2_eth_
/* Register the new context */ channel->dpio = dpaa2_io_service_select(i); - err = dpaa2_io_service_register(channel->dpio, nctx); + err = dpaa2_io_service_register(channel->dpio, nctx, dev); if (err) { dev_dbg(dev, "No affine DPIO for cpu %d\n", i); /* If no affine DPIO for this core, there's probably @@@ -1942,7 -1942,7 +1942,7 @@@ return 0;
err_set_cdan: - dpaa2_io_service_deregister(channel->dpio, nctx); + dpaa2_io_service_deregister(channel->dpio, nctx, dev); err_service_reg: free_channel(priv, channel); err_alloc_ch: @@@ -1962,14 -1962,13 +1962,14 @@@
static void free_dpio(struct dpaa2_eth_priv *priv) { - int i; + struct device *dev = priv->net_dev->dev.parent; struct dpaa2_eth_channel *ch; + int i;
/* deregister CDAN notifications and free channels */ for (i = 0; i < priv->num_channels; i++) { ch = priv->channel[i]; - dpaa2_io_service_deregister(ch->dpio, &ch->nctx); + dpaa2_io_service_deregister(ch->dpio, &ch->nctx, dev); free_channel(priv, ch); } } @@@ -3084,6 -3083,10 +3084,10 @@@ static int dpaa2_eth_probe(struct fsl_m goto err_netdev_reg; }
+ #ifdef CONFIG_DEBUG_FS + dpaa2_dbg_add(priv); + #endif + dev_info(dev, "Probed interface %s\n", net_dev->name); return 0;
@@@ -3127,6 -3130,9 +3131,9 @@@ static int dpaa2_eth_remove(struct fsl_ net_dev = dev_get_drvdata(dev); priv = netdev_priv(net_dev);
+ #ifdef CONFIG_DEBUG_FS + dpaa2_dbg_remove(priv); + #endif unregister_netdev(net_dev);
if (priv->do_link_poll) @@@ -3171,4 -3177,25 +3178,25 @@@ static struct fsl_mc_driver dpaa2_eth_d .match_id_table = dpaa2_eth_match_id_table };
- module_fsl_mc_driver(dpaa2_eth_driver); + static int __init dpaa2_eth_driver_init(void) + { + int err; + + dpaa2_eth_dbg_init(); + err = fsl_mc_driver_register(&dpaa2_eth_driver); + if (err) { + dpaa2_eth_dbg_exit(); + return err; + } + + return 0; + } + + static void __exit dpaa2_eth_driver_exit(void) + { + dpaa2_eth_dbg_exit(); + fsl_mc_driver_unregister(&dpaa2_eth_driver); + } + + module_init(dpaa2_eth_driver_init); + module_exit(dpaa2_eth_driver_exit); diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 685d20472358,517555bcfa44..ced37ce50c90 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@@ -534,7 -534,7 +534,7 @@@ static void stmmac_get_rx_hwtstamp(stru }
/** - * stmmac_hwtstamp_ioctl - control hardware timestamping. + * stmmac_hwtstamp_set - control hardware timestamping. * @dev: device pointer. * @ifr: An IOCTL specific structure, that can contain a pointer to * a proprietary structure used to pass information to the driver. @@@ -544,7 -544,7 +544,7 @@@ * Return Value: * 0 on success and an appropriate -ve integer on failure. */ - static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) + static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct stmmac_priv *priv = netdev_priv(dev); struct hwtstamp_config config; @@@ -573,7 -573,7 +573,7 @@@ }
if (copy_from_user(&config, ifr->ifr_data, - sizeof(struct hwtstamp_config))) + sizeof(config))) return -EFAULT;
netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n", @@@ -765,8 -765,31 +765,31 @@@ (u32)now.tv_sec, now.tv_nsec); }
+ memcpy(&priv->tstamp_config, &config, sizeof(config)); + return copy_to_user(ifr->ifr_data, &config, - sizeof(struct hwtstamp_config)) ? -EFAULT : 0; + sizeof(config)) ? -EFAULT : 0; + } + + /** + * stmmac_hwtstamp_get - read hardware timestamping. + * @dev: device pointer. + * @ifr: An IOCTL specific structure, that can contain a pointer to + * a proprietary structure used to pass information to the driver. + * Description: + * This function obtain the current hardware timestamping settings + as requested. + */ + static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) + { + struct stmmac_priv *priv = netdev_priv(dev); + struct hwtstamp_config *config = &priv->tstamp_config; + + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, config, + sizeof(*config)) ? -EFAULT : 0; }
/** @@@ -3023,22 -3046,10 +3046,22 @@@ static netdev_tx_t stmmac_xmit(struct s
tx_q = &priv->tx_queue[queue];
+ if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } }
if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { @@@ -3053,6 -3064,9 +3076,6 @@@ return NETDEV_TX_BUSY; }
- if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); @@@ -3776,7 -3790,10 +3799,10 @@@ static int stmmac_ioctl(struct net_devi ret = phy_mii_ioctl(dev->phydev, rq, cmd); break; case SIOCSHWTSTAMP: - ret = stmmac_hwtstamp_ioctl(dev, rq); + ret = stmmac_hwtstamp_set(dev, rq); + break; + case SIOCGHWTSTAMP: + ret = stmmac_hwtstamp_get(dev, rq); break; default: break; diff --combined drivers/net/phy/dp83640.c index 6e8807212aa3,25ef483bcc24..2fe2ebaf62d1 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@@ -1,21 -1,8 +1,8 @@@ + // SPDX-License-Identifier: GPL-2.0+ /* * Driver for the National Semiconductor DP83640 PHYTER * * Copyright (C) 2010 OMICRON electronics GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@@ -898,14 -885,14 +885,14 @@@ static void decode_txts(struct dp83640_ struct phy_txts *phy_txts) { struct skb_shared_hwtstamps shhwtstamps; + struct dp83640_skb_info *skb_info; struct sk_buff *skb; - u64 ns; u8 overflow; + u64 ns;
/* We must already have the skb that triggered this. */ - +again: skb = skb_dequeue(&dp83640->tx_queue); - if (!skb) { pr_debug("have timestamp but tx_queue empty\n"); return; @@@ -920,11 -907,6 +907,11 @@@ } return; } + skb_info = (struct dp83640_skb_info *)skb->cb; + if (time_after(jiffies, skb_info->tmo)) { + kfree_skb(skb); + goto again; + }
ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@@ -1477,7 -1459,6 +1464,7 @@@ static bool dp83640_rxtstamp(struct phy static void dp83640_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { + struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb; struct dp83640_private *dp83640 = phydev->priv;
switch (dp83640->hwts_tx_en) { @@@ -1490,7 -1471,6 +1477,7 @@@ /* fall through */ case HWTSTAMP_TX_ON: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->tx_queue, skb); break;
diff --combined drivers/net/phy/marvell.c index abb7876a8776,90f44ba8aca7..3ccba37bd6dd --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@@ -1,3 -1,4 +1,4 @@@ + // SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/marvell.c * @@@ -8,12 -9,6 +9,6 @@@ * Copyright (c) 2004 Freescale Semiconductor, Inc. * * Copyright (c) 2013 Michael Stapelberg michael@stapelberg.de - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include <linux/kernel.h> #include <linux/string.h> @@@ -847,6 -842,7 +842,6 @@@ static int m88e1510_config_init(struct
/* SGMII-to-Copper mode initialization */ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { - /* Select page 18 */ err = marvell_set_page(phydev, 18); if (err < 0) @@@ -869,6 -865,21 +864,6 @@@ err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) return err; - - /* There appears to be a bug in the 88e1512 when used in - * SGMII to copper mode, where the AN advertisement register - * clears the pause bits each time a negotiation occurs. - * This means we can never be truely sure what was advertised, - * so disable Pause support. - */ - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->advertising); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->advertising); }
return m88e1318_config_init(phydev); diff --combined drivers/net/virtio_net.c index 4cfceb789eea,2a0edd4653e3..7eb38ea9ba56 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@@ -57,8 -57,6 +57,8 @@@ module_param(napi_tx, bool, 0644) #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1)
+#define VIRTIO_XDP_FLAG BIT(0) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@@ -254,21 -252,6 +254,21 @@@ struct padded_vnet_hdr char padding[4]; };
+static bool is_xdp_frame(void *ptr) +{ + return (unsigned long)ptr & VIRTIO_XDP_FLAG; +} + +static void *xdp_to_ptr(struct xdp_frame *ptr) +{ + return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); +} + +static struct xdp_frame *ptr_to_xdp(void *ptr) +{ + return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} + /* Converting between virtqueue no. and kernel tx/rx queue no. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq */ @@@ -479,8 -462,7 +479,8 @@@ static int __virtnet_xdp_xmit_one(struc
sg_init_one(sq->sg, xdpf->data, xdpf->len);
- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */
@@@ -500,47 -482,36 +500,47 @@@ static int virtnet_xdp_xmit(struct net_ { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; - struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; + int packets = 0; + int bytes = 0; int drops = 0; int kicks = 0; int ret, err; + void *ptr; int i;
- sq = virtnet_xdp_sq(vi); - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - drops = n; - goto out; - } - /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_dereference(rq->xdp_prog); - if (!xdp_prog) { - ret = -ENXIO; + if (!xdp_prog) + return -ENXIO; + + sq = virtnet_xdp_sq(vi); + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { + ret = -EINVAL; drops = n; goto out; }
/* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(is_xdp_frame(ptr))) { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += frame->len; + xdp_return_frame(frame); + } else { + struct sk_buff *skb = ptr; + + bytes += skb->len; + napi_consume_skb(skb, false); + } + packets++; + }
for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; @@@ -559,8 -530,6 +559,8 @@@ } out: u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; sq->stats.xdp_tx += n; sq->stats.xdp_tx_drops += drops; sq->stats.kicks += kicks; @@@ -1066,6 -1035,7 +1066,7 @@@ static void receive_buf(struct virtnet_ goto frame_err; }
+ skb_record_rx_queue(skb, vq2rxq(rq->vq)); skb->protocol = eth_type_trans(skb, dev); pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); @@@ -1363,26 -1333,18 +1364,26 @@@ static int virtnet_receive(struct recei
static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - struct sk_buff *skb; unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; + void *ptr;
- while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { - pr_debug("Sent skb %p\n", skb); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr;
- bytes += skb->len; - packets++; + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr);
- napi_consume_skb(skb, in_napi); + bytes += frame->len; + xdp_return_frame(frame); + } + packets++; }
/* Avoid overhead when no packets have been processed @@@ -1397,16 -1359,6 +1398,16 @@@ u64_stats_update_end(&sq->stats.syncp); }
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@@ -1414,7 -1366,7 +1415,7 @@@ struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
- if (!sq->napi.weight) + if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return;
if (__netif_tx_trylock(txq)) { @@@ -1491,16 -1443,8 +1492,16 @@@ static int virtnet_poll_tx(struct napi_ { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; - struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); + unsigned int index = vq2txq(sq->vq); + struct netdev_queue *txq;
+ if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { + /* We don't need to enable cb for XDP */ + napi_complete_done(napi, 0); + return 0; + } + + txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); @@@ -2452,10 -2396,6 +2453,10 @@@ static int virtnet_xdp_set(struct net_d return -ENOMEM; }
+ old_prog = rtnl_dereference(vi->rq[0].xdp_prog); + if (!prog && !old_prog) + return 0; + if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); if (IS_ERR(prog)) @@@ -2463,62 -2403,36 +2464,62 @@@ }
/* Make sure NAPI is not using any XDP TX queues for RX. */ - if (netif_running(dev)) - for (i = 0; i < vi->max_queue_pairs; i++) + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + } + + if (!prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0) + virtnet_restore_guest_offloads(vi); + } + synchronize_net(); + }
- netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp;
- for (i = 0; i < vi->max_queue_pairs; i++) { - old_prog = rtnl_dereference(vi->rq[i].xdp_prog); - rcu_assign_pointer(vi->rq[i].xdp_prog, prog); - if (i == 0) { - if (!old_prog) + if (prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); - if (!prog) - virtnet_restore_guest_offloads(vi); } + } + + for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) + if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } }
return 0;
err: - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (!prog) { + virtnet_clear_guest_offloads(vi); + for (i = 0; i < vi->max_queue_pairs; i++) + rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); + } + + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } + } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; @@@ -2700,6 -2614,16 +2701,6 @@@ static void free_receive_page_frags(str put_page(vi->rq[i].alloc_frag.page); }
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@@ -2708,10 -2632,10 +2709,10 @@@ for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_raw_buffer_queue(vi, i)) + if (!is_xdp_frame(buf)) dev_kfree_skb(buf); else - put_page(virt_to_head_page(buf)); + xdp_return_frame(ptr_to_xdp(buf)); } }
diff --combined drivers/s390/net/qeth_core.h index 122059ecad84,d65650ef6b41..71d27a804920 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@@ -22,7 -22,6 +22,7 @@@ #include <linux/hashtable.h> #include <linux/ip.h> #include <linux/refcount.h> +#include <linux/workqueue.h>
#include <net/ipv6.h> #include <net/if_inet6.h> @@@ -742,11 -741,6 +742,6 @@@ struct qeth_discipline struct qeth_ipa_cmd *cmd); };
- struct qeth_vlan_vid { - struct list_head list; - unsigned short vid; - }; - enum qeth_addr_disposition { QETH_DISP_ADDR_DELETE = 0, QETH_DISP_ADDR_DO_NOTHING = 1, @@@ -790,12 -784,9 +785,10 @@@ struct qeth_card struct qeth_seqno seqno; struct qeth_card_options options;
+ struct workqueue_struct *event_wq; wait_queue_head_t wait_q; spinlock_t mclock; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mutex vid_list_mutex; /* vid_list */ - struct list_head vid_list; DECLARE_HASHTABLE(mac_htable, 4); DECLARE_HASHTABLE(ip_htable, 4); DECLARE_HASHTABLE(ip_mc_htable, 4); @@@ -804,7 -795,6 +797,6 @@@ unsigned long thread_start_mask; unsigned long thread_allowed_mask; unsigned long thread_running_mask; - struct task_struct *recovery_task; spinlock_t ip_lock; struct qeth_ipato ipato; struct list_head cmd_waiter_list; @@@ -964,6 -954,7 +956,6 @@@ extern const struct attribute_group *qe extern const struct attribute_group qeth_device_attr_group; extern const struct attribute_group qeth_device_blkt_group; extern const struct device_type qeth_generic_devtype; -extern struct workqueue_struct *qeth_wq;
int qeth_card_hw_is_reachable(struct qeth_card *); const char *qeth_get_cardname_short(struct qeth_card *); @@@ -977,11 -968,8 +969,8 @@@ extern struct qeth_dbf_info qeth_dbf[QE
struct net_device *qeth_clone_netdev(struct net_device *orig); struct qeth_card *qeth_get_card_by_busid(char *bus_id); - void qeth_set_recovery_task(struct qeth_card *); - void qeth_clear_recovery_task(struct qeth_card *); void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); int qeth_threads_running(struct qeth_card *, unsigned long); - int qeth_wait_for_threads(struct qeth_card *, unsigned long); int qeth_do_run_thread(struct qeth_card *, unsigned long); void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long); void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long); @@@ -1048,6 -1036,9 +1037,9 @@@ netdev_features_t qeth_fix_features(str netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); + int qeth_open(struct net_device *dev); + int qeth_stop(struct net_device *dev); + int qeth_vm_request_mac(struct qeth_card *card); int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, diff --combined drivers/s390/net/qeth_core_main.c index 89f912213e62,dcc06e48b70b..6ef0c89370b5 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -74,7 -74,8 +74,7 @@@ static void qeth_notify_skbs(struct qet static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int);
-struct workqueue_struct *qeth_wq; -EXPORT_SYMBOL_GPL(qeth_wq); +static struct workqueue_struct *qeth_wq;
int qeth_card_hw_is_reachable(struct qeth_card *card) { @@@ -192,23 -193,6 +192,6 @@@ const char *qeth_get_cardname_short(str return "n/a"; }
- void qeth_set_recovery_task(struct qeth_card *card) - { - card->recovery_task = current; - } - EXPORT_SYMBOL_GPL(qeth_set_recovery_task); - - void qeth_clear_recovery_task(struct qeth_card *card) - { - card->recovery_task = NULL; - } - EXPORT_SYMBOL_GPL(qeth_clear_recovery_task); - - static bool qeth_is_recovery_task(const struct qeth_card *card) - { - return card->recovery_task == current; - } - void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask) { @@@ -235,15 -219,6 +218,6 @@@ int qeth_threads_running(struct qeth_ca } EXPORT_SYMBOL_GPL(qeth_threads_running);
- int qeth_wait_for_threads(struct qeth_card *card, unsigned long threads) - { - if (qeth_is_recovery_task(card)) - return 0; - return wait_event_interruptible(card->wait_q, - qeth_threads_running(card, threads) == 0); - } - EXPORT_SYMBOL_GPL(qeth_wait_for_threads); - void qeth_clear_working_pool_list(struct qeth_card *card) { struct qeth_buffer_pool_entry *pool_entry, *tmp; @@@ -565,7 -540,6 +539,7 @@@ static int __qeth_issue_next_read(struc QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); wake_up(&card->wait_q); @@@ -1127,8 -1101,6 +1101,8 @@@ static void qeth_irq(struct ccw_device rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + if (iob) + qeth_release_buffer(iob->channel, iob); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@@ -1432,7 -1404,6 +1406,6 @@@ static void qeth_setup_card(struct qeth spin_lock_init(&card->thread_mask_lock); mutex_init(&card->conf_mutex); mutex_init(&card->discipline_mutex); - mutex_init(&card->vid_list_mutex); INIT_WORK(&card->kernel_thread_starter, qeth_start_kernel_thread); INIT_LIST_HEAD(&card->cmd_waiter_list); init_waitqueue_head(&card->wait_q); @@@ -1468,10 -1439,6 +1441,10 @@@ static struct qeth_card *qeth_alloc_car CARD_RDEV(card) = gdev->cdev[0]; CARD_WDEV(card) = gdev->cdev[1]; CARD_DDEV(card) = gdev->cdev[2]; + + card->event_wq = alloc_ordered_workqueue("%s", 0, dev_name(&gdev->dev)); + if (!card->event_wq) + goto out_wq; if (qeth_setup_channel(&card->read, true)) goto out_ip; if (qeth_setup_channel(&card->write, true)) @@@ -1487,8 -1454,6 +1460,8 @@@ out_data out_channel: qeth_clean_channel(&card->read); out_ip: + destroy_workqueue(card->event_wq); +out_wq: dev_set_drvdata(&gdev->dev, NULL); kfree(card); out: @@@ -1817,7 -1782,6 +1790,7 @@@ static int qeth_idx_activate_get_answer QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc); QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@@ -1887,7 -1851,6 +1860,7 @@@ static int qeth_idx_activate_channel(st rc); QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@@ -2068,7 -2031,6 +2041,7 @@@ int qeth_send_control_data(struct qeth_ } reply = qeth_alloc_reply(card); if (!reply) { + qeth_release_buffer(channel, iob); return -ENOMEM; } reply->callback = reply_cb; @@@ -2400,12 -2362,11 +2373,12 @@@ static int qeth_init_qdio_out_buf(struc return 0; }
-static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return;
+ qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@@ -2479,8 -2440,10 +2452,8 @@@ out_freeoutqbufs card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@@ -2513,8 -2476,10 +2486,8 @@@ static void qeth_free_qdio_buffers(stru qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; } @@@ -3566,8 -3531,6 +3539,6 @@@ static void qeth_qdio_cq_handler(struc card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init + count) % QDIO_MAX_BUFFERS_PER_Q;
- netif_wake_queue(card->dev); - if (card->options.performance_stats) { int delta_t = qeth_get_micros(); delta_t -= card->perf_stats.cq_start_time; @@@ -3936,7 -3899,6 +3907,6 @@@ static int qeth_fill_buffer(struct qeth { struct qdio_buffer *buffer = buf->buffer; bool is_first_elem = true; - int flush_cnt = 0;
__skb_queue_tail(&buf->skb_list, skb);
@@@ -3957,24 -3919,22 +3927,22 @@@
if (!queue->do_pack) { QETH_CARD_TEXT(queue->card, 6, "fillbfnp"); - /* set state to PRIMED -> will be flushed */ - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); - flush_cnt = 1; } else { QETH_CARD_TEXT(queue->card, 6, "fillbfpa"); if (queue->card->options.performance_stats) queue->card->perf_stats.skbs_sent_pack++; - if (buf->next_element_to_fill >= - QETH_MAX_BUFFER_ELEMENTS(queue->card)) { - /* - * packed buffer if full -> set state PRIMED - * -> will be flushed - */ - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); - flush_cnt = 1; - } + + /* If the buffer still has free elements, keep using it. */ + if (buf->next_element_to_fill < + QETH_MAX_BUFFER_ELEMENTS(queue->card)) + return 0; } - return flush_cnt; + + /* flush out the buffer */ + atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); + queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % + QDIO_MAX_BUFFERS_PER_Q; + return 1; }
static int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, @@@ -3990,7 -3950,6 +3958,6 @@@ */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) return -EBUSY; - queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q; qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); qeth_flush_buffers(queue, index, 1); return 0; @@@ -4048,10 -4007,9 +4015,9 @@@ int qeth_do_send_packet(struct qeth_car } } } - tmp = qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); - queue->next_buf_to_fill = (queue->next_buf_to_fill + tmp) % - QDIO_MAX_BUFFERS_PER_Q; - flush_count += tmp; + + flush_count += qeth_fill_buffer(queue, buffer, skb, hdr, offset, + hd_len); if (flush_count) qeth_flush_buffers(queue, start_index, flush_count); else if (!atomic_read(&queue->set_pci_flags_count)) @@@ -5036,7 -4994,6 +5002,7 @@@ static void qeth_core_free_card(struct qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); qeth_clean_channel(&card->data); + destroy_workqueue(card->event_wq); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); dev_set_drvdata(&card->gdev->dev, NULL); @@@ -5163,13 -5120,6 +5129,6 @@@ retriable *carrier_ok = true; }
- if (qeth_netdev_is_registered(card->dev)) { - if (*carrier_ok) - netif_carrier_on(card->dev); - else - netif_carrier_off(card->dev); - } - card->options.ipa4.supported_funcs = 0; card->options.ipa6.supported_funcs = 0; card->options.adp.supported_funcs = 0; @@@ -5946,9 -5896,6 +5905,6 @@@ int qeth_do_ioctl(struct net_device *de if (!card) return -ENODEV;
- if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; - if (card->info.type == QETH_CARD_TYPE_OSN) return -EPERM;
@@@ -6259,8 -6206,6 +6215,6 @@@ int qeth_core_ethtool_get_link_ksetting /* Check if we can obtain more accurate information. */ /* If QUERY_CARD_INFO command is not supported or fails, */ /* just return the heuristics that was filled above. */ - if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; rc = qeth_query_card_info(card, &carrier_info); if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ return 0; @@@ -6543,8 -6488,6 +6497,6 @@@ static int qeth_set_ipa_rx_csum(struct return (rc_ipv6) ? rc_ipv6 : rc_ipv4; }
- #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ - NETIF_F_IPV6_CSUM | NETIF_F_TSO6) /** * qeth_enable_hw_features() - (Re-)Enable HW functions for device features * @dev: a net_device @@@ -6554,17 -6497,20 +6506,20 @@@ void qeth_enable_hw_features(struct net struct qeth_card *card = dev->ml_priv; netdev_features_t features;
- rtnl_lock(); features = dev->features; - /* force-off any feature that needs an IPA sequence. + /* force-off any feature that might need an IPA sequence. * netdev_update_features() will restart them. */ - dev->features &= ~QETH_HW_FEATURES; + dev->features &= ~dev->hw_features; + /* toggle VLAN filter, so that VIDs are re-programmed: */ + if (IS_LAYER2(card) && IS_VM_NIC(card)) { + dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + dev->wanted_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } netdev_update_features(dev); if (features != dev->features) dev_warn(&card->gdev->dev, "Device recovery failed to restore all offload features\n"); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(qeth_enable_hw_features);
@@@ -6633,10 -6579,7 +6588,7 @@@ netdev_features_t qeth_fix_features(str features &= ~NETIF_F_TSO; if (!qeth_is_supported6(card, IPA_OUTBOUND_TSO)) features &= ~NETIF_F_TSO6; - /* if the card isn't up, remove features that require hw changes */ - if (card->state == CARD_STATE_DOWN || - card->state == CARD_STATE_RECOVER) - features &= ~QETH_HW_FEATURES; + QETH_DBF_HEX(SETUP, 2, &features, sizeof(features)); return features; } @@@ -6668,6 -6611,46 +6620,46 @@@ netdev_features_t qeth_features_check(s } EXPORT_SYMBOL_GPL(qeth_features_check);
+ int qeth_open(struct net_device *dev) + { + struct qeth_card *card = dev->ml_priv; + + QETH_CARD_TEXT(card, 4, "qethopen"); + if (card->state == CARD_STATE_UP) + return 0; + if (card->state != CARD_STATE_SOFTSETUP) + return -ENODEV; + + if (qdio_stop_irq(CARD_DDEV(card), 0) < 0) + return -EIO; + + card->data.state = CH_STATE_UP; + card->state = CARD_STATE_UP; + netif_start_queue(dev); + + napi_enable(&card->napi); + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); + return 0; + } + EXPORT_SYMBOL_GPL(qeth_open); + + int qeth_stop(struct net_device *dev) + { + struct qeth_card *card = dev->ml_priv; + + QETH_CARD_TEXT(card, 4, "qethstop"); + netif_tx_disable(dev); + if (card->state == CARD_STATE_UP) { + card->state = CARD_STATE_SOFTSETUP; + napi_disable(&card->napi); + } + return 0; + } + EXPORT_SYMBOL_GPL(qeth_stop); + static int __init qeth_core_init(void) { int rc; diff --combined drivers/s390/net/qeth_l2_main.c index a43de2f9bcac,82f50cc30b0a..ef0b5eaf2532 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@@ -25,7 -25,6 +25,6 @@@ #include "qeth_l2.h"
static int qeth_l2_set_offline(struct ccwgroup_device *); - static int qeth_l2_stop(struct net_device *); static void qeth_bridgeport_query_support(struct qeth_card *card); static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); @@@ -98,8 -97,7 +97,7 @@@ static int qeth_l2_send_setmac(struct q rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC); if (rc == 0) { dev_info(&card->gdev->dev, - "MAC address %pM successfully registered on device %s\n", - mac, card->dev->name); + "MAC address %pM successfully registered\n", mac); } else { switch (rc) { case -EEXIST: @@@ -263,75 -261,28 +261,28 @@@ static int qeth_l2_send_setdelvlan(stru qeth_l2_send_setdelvlan_cb, NULL)); }
- static void qeth_l2_process_vlans(struct qeth_card *card) - { - struct qeth_vlan_vid *id; - - QETH_CARD_TEXT(card, 3, "L2prcvln"); - mutex_lock(&card->vid_list_mutex); - list_for_each_entry(id, &card->vid_list, list) { - qeth_l2_send_setdelvlan(card, id->vid, IPA_CMD_SETVLAN); - } - mutex_unlock(&card->vid_list_mutex); - } - static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; - struct qeth_vlan_vid *id; - int rc;
QETH_CARD_TEXT_(card, 4, "aid:%d", vid); if (!vid) return 0; - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "aidREC"); - return 0; - } - id = kmalloc(sizeof(*id), GFP_KERNEL); - if (id) { - id->vid = vid; - rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); - if (rc) { - kfree(id); - return rc; - } - mutex_lock(&card->vid_list_mutex); - list_add_tail(&id->list, &card->vid_list); - mutex_unlock(&card->vid_list_mutex); - } else { - return -ENOMEM; - } - return 0; + + return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); }
static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { - struct qeth_vlan_vid *id, *tmpid = NULL; struct qeth_card *card = dev->ml_priv; - int rc = 0;
QETH_CARD_TEXT_(card, 4, "kid:%d", vid); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "kidREC"); + if (!vid) return 0; - } - mutex_lock(&card->vid_list_mutex); - list_for_each_entry(id, &card->vid_list, list) { - if (id->vid == vid) { - list_del(&id->list); - tmpid = id; - break; - } - } - mutex_unlock(&card->vid_list_mutex); - if (tmpid) { - rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); - kfree(tmpid); - } - return rc; + + return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); }
static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode) @@@ -343,9 -294,8 +294,8 @@@ if (card->read.state == CH_STATE_UP && card->write.state == CH_STATE_UP && (card->state == CARD_STATE_UP)) { - if (recovery_mode && - card->info.type != QETH_CARD_TYPE_OSN) { - qeth_l2_stop(card->dev); + if (recovery_mode && !IS_OSN(card)) { + qeth_stop(card->dev); } else { rtnl_lock(); dev_close(card->dev); @@@ -369,8 -319,6 +319,8 @@@ qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); }
static int qeth_l2_process_inbound_buffer(struct qeth_card *card, @@@ -462,6 -410,26 +412,26 @@@ out return 0; }
+ static void qeth_l2_register_dev_addr(struct qeth_card *card) + { + if (!is_valid_ether_addr(card->dev->dev_addr)) + qeth_l2_request_initial_mac(card); + + if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr)) + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; + } + + static int qeth_l2_validate_addr(struct net_device *dev) + { + struct qeth_card *card = dev->ml_priv; + + if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + return eth_validate_addr(dev); + + QETH_CARD_TEXT(card, 4, "nomacadr"); + return -EPERM; + } + static int qeth_l2_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@@ -481,39 -449,22 +451,22 @@@ if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL;
- if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "setmcREC"); - return -ERESTARTSYS; - } - - /* avoid racing against concurrent state change: */ - if (!mutex_trylock(&card->conf_mutex)) - return -EAGAIN; - - if (!qeth_card_hw_is_reachable(card)) { - ether_addr_copy(dev->dev_addr, addr->sa_data); - goto out_unlock; - } - /* don't register the same address twice */ if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) - goto out_unlock; + return 0;
/* add the new address, switch over, drop the old */ rc = qeth_l2_send_setmac(card, addr->sa_data); if (rc) - goto out_unlock; + return rc; ether_addr_copy(old_addr, dev->dev_addr); ether_addr_copy(dev->dev_addr, addr->sa_data);
if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) qeth_l2_remove_mac(card, old_addr); card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; - - out_unlock: - mutex_unlock(&card->conf_mutex); - return rc; + return 0; }
static void qeth_promisc_to_bridge(struct qeth_card *card) @@@ -588,9 -539,6 +541,6 @@@ static void qeth_l2_set_rx_mode(struct return;
QETH_CARD_TEXT(card, 3, "setmulti"); - if (qeth_threads_running(card, QETH_RECOVER_THREAD) && - (card->state != CARD_STATE_UP)) - return;
spin_lock_bh(&card->mclock);
@@@ -714,62 -662,6 +664,6 @@@ tx_drop return NETDEV_TX_OK; }
- static int __qeth_l2_open(struct net_device *dev) - { - struct qeth_card *card = dev->ml_priv; - int rc = 0; - - QETH_CARD_TEXT(card, 4, "qethopen"); - if (card->state == CARD_STATE_UP) - return rc; - if (card->state != CARD_STATE_SOFTSETUP) - return -ENODEV; - - if ((card->info.type != QETH_CARD_TYPE_OSN) && - (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))) { - QETH_CARD_TEXT(card, 4, "nomacadr"); - return -EPERM; - } - card->data.state = CH_STATE_UP; - card->state = CARD_STATE_UP; - netif_start_queue(dev); - - if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { - napi_enable(&card->napi); - local_bh_disable(); - napi_schedule(&card->napi); - /* kick-start the NAPI softirq: */ - local_bh_enable(); - } else - rc = -EIO; - return rc; - } - - static int qeth_l2_open(struct net_device *dev) - { - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "qethope_"); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "openREC"); - return -ERESTARTSYS; - } - return __qeth_l2_open(dev); - } - - static int qeth_l2_stop(struct net_device *dev) - { - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 4, "qethstop"); - netif_tx_disable(dev); - if (card->state == CARD_STATE_UP) { - card->state = CARD_STATE_SOFTSETUP; - napi_disable(&card->napi); - } - return 0; - } - static const struct device_type qeth_l2_devtype = { .name = "qeth_layer2", .groups = qeth_l2_attr_groups, @@@ -785,7 -677,7 +679,7 @@@ static int qeth_l2_probe_device(struct if (rc) return rc; } - INIT_LIST_HEAD(&card->vid_list); + hash_init(card->mac_htable); card->info.hwtrap = 0; qeth_l2_vnicc_set_defaults(card); @@@ -803,8 -695,6 +697,8 @@@ static void qeth_l2_remove_device(struc
if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); + + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); } @@@ -826,12 -716,12 +720,12 @@@ static const struct ethtool_ops qeth_l2 };
static const struct net_device_ops qeth_l2_netdev_ops = { - .ndo_open = qeth_l2_open, - .ndo_stop = qeth_l2_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, .ndo_features_check = qeth_features_check, - .ndo_validate_addr = eth_validate_addr, + .ndo_validate_addr = qeth_l2_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_do_ioctl, .ndo_set_mac_address = qeth_l2_set_mac_address, @@@ -846,9 -736,6 +740,6 @@@ static int qeth_l2_setup_netdev(struct { int rc;
- if (qeth_netdev_is_registered(card->dev)) - return 0; - card->dev->priv_flags |= IFF_UNICAST_FLT; card->dev->netdev_ops = &qeth_l2_netdev_ops; if (card->info.type == QETH_CARD_TYPE_OSN) { @@@ -859,10 -746,13 +750,13 @@@ card->dev->needed_headroom = sizeof(struct qeth_hdr); }
- if (card->info.type == QETH_CARD_TYPE_OSM) + if (IS_OSM(card)) { card->dev->features |= NETIF_F_VLAN_CHALLENGED; - else + } else { + if (!IS_VM_NIC(card)) + card->dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + }
if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->features |= NETIF_F_SG; @@@ -896,8 -786,6 +790,6 @@@ PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); }
- if (!is_valid_ether_addr(card->dev->dev_addr)) - qeth_l2_request_initial_mac(card); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); if (!rc && carrier_ok) @@@ -931,6 -819,7 +823,7 @@@ static void qeth_l2_trace_features(stru static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct net_device *dev = card->dev; int rc = 0; enum qeth_card_states recover_flag; bool carrier_ok; @@@ -952,13 -841,7 +845,7 @@@ dev_info(&card->gdev->dev, "The device represents a Bridge Capable Port\n");
- rc = qeth_l2_setup_netdev(card, carrier_ok); - if (rc) - goto out_remove; - - if (card->info.type != QETH_CARD_TYPE_OSN && - !qeth_l2_send_setmac(card, card->dev->dev_addr)) - card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; + qeth_l2_register_dev_addr(card);
if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && @@@ -988,11 -871,6 +875,6 @@@ goto out_remove; }
- if (card->info.type != QETH_CARD_TYPE_OSN) - qeth_l2_process_vlans(card); - - netif_tx_disable(card->dev); - rc = qeth_init_qdio_queues(card); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc); @@@ -1003,17 -881,31 +885,31 @@@
qeth_set_allowed_threads(card, 0xffffffff, 0);
- qeth_enable_hw_features(card->dev); - if (recover_flag == CARD_STATE_RECOVER) { - if (recovery_mode && - card->info.type != QETH_CARD_TYPE_OSN) { - __qeth_l2_open(card->dev); - qeth_l2_set_rx_mode(card->dev); - } else { - rtnl_lock(); - dev_open(card->dev, NULL); - rtnl_unlock(); + if (!qeth_netdev_is_registered(dev)) { + rc = qeth_l2_setup_netdev(card, carrier_ok); + if (rc) + goto out_remove; + } else { + rtnl_lock(); + if (carrier_ok) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + netif_device_attach(dev); + qeth_enable_hw_features(dev); + + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode && !IS_OSN(card)) { + if (!qeth_l2_validate_addr(dev)) { + qeth_open(dev); + qeth_l2_set_rx_mode(dev); + } + } else { + dev_open(dev, NULL); + } } + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); @@@ -1053,7 -945,11 +949,11 @@@ static int __qeth_l2_set_offline(struc QETH_DBF_TEXT(SETUP, 3, "setoffl"); QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
+ rtnl_lock(); + netif_device_detach(card->dev); netif_carrier_off(card->dev); + rtnl_unlock(); + recover_flag = card->state; if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) { qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); @@@ -1094,7 -990,6 +994,6 @@@ static int qeth_l2_recover(void *ptr QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_recovery_task(card); __qeth_l2_set_offline(card->gdev, 1); rc = __qeth_l2_set_online(card->gdev, 1); if (!rc) @@@ -1105,7 -1000,6 +1004,6 @@@ dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } - qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; @@@ -1126,7 -1020,6 +1024,6 @@@ static int qeth_l2_pm_suspend(struct cc { struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- netif_device_detach(card->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_OFFLINE) @@@ -1156,7 -1049,6 +1053,6 @@@ static int qeth_l2_pm_resume(struct ccw rc = __qeth_l2_set_online(card->gdev, 0);
qeth_set_allowed_threads(card, 0xffffffff, 0); - netif_device_attach(card->dev); if (rc) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); @@@ -1438,7 -1330,7 +1334,7 @@@ static void qeth_bridge_state_change(st data->card = card; memcpy(&data->qports, qports, sizeof(struct qeth_sbp_state_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); }
struct qeth_bridge_host_data { @@@ -1510,7 -1402,7 +1406,7 @@@ static void qeth_bridge_host_event(stru data->card = card; memcpy(&data->hostevs, hostevs, sizeof(struct qeth_ipacmd_addr_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); }
/* SETBRIDGEPORT support; sending commands */ diff --combined drivers/s390/net/qeth_l3_main.c index df34bff4ac31,59535ecb1487..f7d0623999ba --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@@ -40,7 -40,6 +40,6 @@@
static int qeth_l3_set_offline(struct ccwgroup_device *); - static int qeth_l3_stop(struct net_device *); static void qeth_l3_set_rx_mode(struct net_device *dev); static int qeth_l3_register_addr_entry(struct qeth_card *, struct qeth_ipaddr *); @@@ -1281,10 -1280,6 +1280,6 @@@ static int qeth_l3_vlan_rx_kill_vid(str
QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
- if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "kidREC"); - return 0; - } clear_bit(vid, card->active_vlans); qeth_l3_set_rx_mode(dev); return 0; @@@ -1410,7 -1405,7 +1405,7 @@@ static void qeth_l3_stop_card(struct qe card->write.state == CH_STATE_UP && (card->state == CARD_STATE_UP)) { if (recovery_mode) - qeth_l3_stop(card->dev); + qeth_stop(card->dev); else { rtnl_lock(); dev_close(card->dev); @@@ -1433,8 -1428,6 +1428,8 @@@ qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); }
/* @@@ -1475,9 -1468,7 +1470,7 @@@ static void qeth_l3_set_rx_mode(struct int i, rc;
QETH_CARD_TEXT(card, 3, "setmulti"); - if (qeth_threads_running(card, QETH_RECOVER_THREAD) && - (card->state != CARD_STATE_UP)) - return; + if (!card->options.sniffer) { spin_lock_bh(&card->mclock);
@@@ -2102,56 -2093,6 +2095,6 @@@ tx_drop return NETDEV_TX_OK; }
- static int __qeth_l3_open(struct net_device *dev) - { - struct qeth_card *card = dev->ml_priv; - int rc = 0; - - QETH_CARD_TEXT(card, 4, "qethopen"); - if (card->state == CARD_STATE_UP) - return rc; - if (card->state != CARD_STATE_SOFTSETUP) - return -ENODEV; - card->data.state = CH_STATE_UP; - card->state = CARD_STATE_UP; - netif_start_queue(dev); - - if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { - napi_enable(&card->napi); - local_bh_disable(); - napi_schedule(&card->napi); - /* kick-start the NAPI softirq: */ - local_bh_enable(); - } else - rc = -EIO; - return rc; - } - - static int qeth_l3_open(struct net_device *dev) - { - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "qethope_"); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "openREC"); - return -ERESTARTSYS; - } - return __qeth_l3_open(dev); - } - - static int qeth_l3_stop(struct net_device *dev) - { - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 4, "qethstop"); - netif_tx_disable(dev); - if (card->state == CARD_STATE_UP) { - card->state = CARD_STATE_SOFTSETUP; - napi_disable(&card->napi); - } - return 0; - } - static const struct ethtool_ops qeth_l3_ethtool_ops = { .get_link = ethtool_op_get_link, .get_strings = qeth_core_get_strings, @@@ -2195,8 -2136,8 +2138,8 @@@ static netdev_features_t qeth_l3_osa_fe }
static const struct net_device_ops qeth_l3_netdev_ops = { - .ndo_open = qeth_l3_open, - .ndo_stop = qeth_l3_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, @@@ -2210,8 -2151,8 +2153,8 @@@ };
static const struct net_device_ops qeth_l3_osa_netdev_ops = { - .ndo_open = qeth_l3_open, - .ndo_stop = qeth_l3_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_features_check = qeth_l3_osa_features_check, @@@ -2231,9 -2172,6 +2174,6 @@@ static int qeth_l3_setup_netdev(struct unsigned int headroom; int rc;
- if (qeth_netdev_is_registered(card->dev)) - return 0; - if (card->info.type == QETH_CARD_TYPE_OSD || card->info.type == QETH_CARD_TYPE_OSX) { if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) || @@@ -2340,7 -2278,6 +2280,7 @@@ static void qeth_l3_remove_device(struc if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev);
+ cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0); @@@ -2350,6 -2287,7 +2290,7 @@@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct net_device *dev = card->dev; int rc = 0; enum qeth_card_states recover_flag; bool carrier_ok; @@@ -2367,10 -2305,6 +2308,6 @@@ goto out_remove; }
- rc = qeth_l3_setup_netdev(card, carrier_ok); - if (rc) - goto out_remove; - if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM)) @@@ -2400,7 -2334,6 +2337,6 @@@ if (rc) QETH_DBF_TEXT_(SETUP, 2, "5err%04x", rc); } - netif_tx_disable(card->dev);
rc = qeth_init_qdio_queues(card); if (rc) { @@@ -2413,14 -2346,27 +2349,27 @@@ qeth_set_allowed_threads(card, 0xffffffff, 0); qeth_l3_recover_ip(card);
- qeth_enable_hw_features(card->dev); - if (recover_flag == CARD_STATE_RECOVER) { + if (!qeth_netdev_is_registered(dev)) { + rc = qeth_l3_setup_netdev(card, carrier_ok); + if (rc) + goto out_remove; + } else { rtnl_lock(); - if (recovery_mode) { - __qeth_l3_open(card->dev); - qeth_l3_set_rx_mode(card->dev); - } else { - dev_open(card->dev, NULL); + if (carrier_ok) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + netif_device_attach(dev); + qeth_enable_hw_features(dev); + + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode) { + qeth_open(dev); + qeth_l3_set_rx_mode(dev); + } else { + dev_open(dev, NULL); + } } rtnl_unlock(); } @@@ -2462,7 -2408,11 +2411,11 @@@ static int __qeth_l3_set_offline(struc QETH_DBF_TEXT(SETUP, 3, "setoffl"); QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
+ rtnl_lock(); + netif_device_detach(card->dev); netif_carrier_off(card->dev); + rtnl_unlock(); + recover_flag = card->state; if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) { qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); @@@ -2509,7 -2459,6 +2462,6 @@@ static int qeth_l3_recover(void *ptr QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_recovery_task(card); __qeth_l3_set_offline(card->gdev, 1); rc = __qeth_l3_set_online(card->gdev, 1); if (!rc) @@@ -2520,7 -2469,6 +2472,6 @@@ dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } - qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; @@@ -2530,7 -2478,6 +2481,6 @@@ static int qeth_l3_pm_suspend(struct cc { struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- netif_device_detach(card->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_OFFLINE) @@@ -2560,7 -2507,6 +2510,6 @@@ static int qeth_l3_pm_resume(struct ccw rc = __qeth_l3_set_online(card->gdev, 0);
qeth_set_allowed_threads(card, 0xffffffff, 0); - netif_device_attach(card->dev); if (rc) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); diff --combined include/linux/filter.h index e532fcc6e4b5,7317376734f7..95e2d7ebdf21 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@@ -277,6 -277,26 +277,26 @@@ struct sock_reuseport .off = OFF, \ .imm = IMM })
+ /* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + + #define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + + /* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + + #define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */
#define BPF_JMP_A(OFF) \ @@@ -591,8 -611,8 +611,8 @@@ static inline u8 *bpf_skb_cb(struct sk_ return qdisc_skb_cb(skb)->data; }
-static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, - struct sk_buff *skb) +static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; @@@ -611,30 -631,15 +631,30 @@@ return res; }
+static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u32 res; + + preempt_disable(); + res = __bpf_prog_run_save_cb(prog, skb); + preempt_enable(); + return res; +} + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); + u32 res;
if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN);
- return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; }
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, @@@ -793,6 -798,7 +813,7 @@@ static inline bool bpf_dump_raw_ok(void
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
void bpf_clear_redirect_map(struct bpf_map *map);
@@@ -874,7 -880,9 +895,9 @@@ bpf_jit_binary_alloc(unsigned int progl unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); - + u64 bpf_jit_alloc_exec_limit(void); + void *bpf_jit_alloc_exec(unsigned long size); + void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp);
int bpf_jit_get_func_addr(const struct bpf_prog *prog, diff --combined include/linux/netdevice.h index 86dbb3e29139,ba57d0ba425e..b6c6d2fe17b0 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@@ -630,6 -630,7 +630,7 @@@ struct netdev_queue } ____cacheline_aligned_in_smp;
extern int sysctl_fb_tunnels_only_for_init_net; + extern int sysctl_devconf_inherit_init_net;
static inline bool net_has_fallback_tunnels(const struct net *net) { @@@ -1152,7 -1153,8 +1153,8 @@@ struct dev_ifalias * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid, u16 flags) + * const unsigned char *addr, u16 vid, u16 flags, + * struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, @@@ -1376,7 -1378,8 +1378,8 @@@ struct net_device_ops struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@@ -1483,7 -1486,6 +1486,7 @@@ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@@ -1515,7 -1517,6 +1518,7 @@@ IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, };
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@@ -1546,7 -1547,6 +1549,7 @@@ #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE +#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
/** * struct net_device - The DEVICE structure. @@@ -4552,11 -4552,6 +4555,11 @@@ static inline bool netif_supports_nofcs return dev->priv_flags & IFF_SUPP_NOFCS; }
+static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; @@@ -4668,22 -4663,22 +4671,22 @@@ static inline const char *netdev_reg_st return " (unknown)"; }
- __printf(3, 4) + __printf(3, 4) __cold void netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_emerg(const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_alert(const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_crit(const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_err(const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_warn(const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_notice(const struct net_device *dev, const char *format, ...); - __printf(2, 3) + __printf(2, 3) __cold void netdev_info(const struct net_device *dev, const char *format, ...);
#define netdev_level_once(level, dev, fmt, ...) \ diff --combined include/linux/skbuff.h index 19c1793ed192,831846617d07..43c62554f848 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -1221,6 -1221,11 +1221,11 @@@ static inline int skb_flow_dissector_bp } #endif
+ struct bpf_flow_keys; + bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys); bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, @@@ -3481,16 -3486,25 +3486,25 @@@ static inline ktime_t skb_get_ktime(con /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from - * @stamp: pointer to struct timeval to store stamp in + * @stamp: pointer to struct __kernel_old_timeval to store stamp in * * Timestamps are stored in the skb as offsets to a base timestamp. * This function converts the offset back to a struct timeval and stores * it in stamp. */ static inline void skb_get_timestamp(const struct sk_buff *skb, - struct timeval *stamp) + struct __kernel_old_timeval *stamp) { - *stamp = ktime_to_timeval(skb->tstamp); + *stamp = ns_to_kernel_old_timeval(skb->tstamp); + } + + static inline void skb_get_new_timestamp(const struct sk_buff *skb, + struct __kernel_sock_timeval *stamp) + { + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_usec = ts.tv_nsec / 1000; }
static inline void skb_get_timestampns(const struct sk_buff *skb, @@@ -3499,6 -3513,15 +3513,15 @@@ *stamp = ktime_to_timespec(skb->tstamp); }
+ static inline void skb_get_new_timestampns(const struct sk_buff *skb, + struct __kernel_timespec *stamp) + { + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_nsec = ts.tv_nsec; + } + static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); @@@ -4297,7 -4320,7 +4320,7 @@@ static inline bool skb_head_is_locked(c /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. - * See Documentation/networking/checksum-offloads.txt for + * See Documentation/networking/checksum-offloads.rst for * explanation of how this works. * Fill in outer checksum adjustment (e.g. with sum of outer * pseudo-header) before calling. diff --combined include/net/netfilter/nf_tables.h index b4984bbbe157,45eba7d7ab38..a66fcd316734 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@@ -469,7 -469,9 +469,7 @@@ struct nft_set_binding int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); + struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
/** @@@ -719,13 -721,6 +719,13 @@@ struct nft_expr_type #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2
+enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE +}; + /** * struct nft_expr_ops - nf_tables expression operations * @@@ -755,8 -750,7 +755,8 @@@ struct nft_expr_ops void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, - const struct nft_expr *expr); + const struct nft_expr *expr, + enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, @@@ -1018,21 -1012,32 +1018,32 @@@ int nft_verdict_dump(struct sk_buff *sk const struct nft_verdict *v);
/** + * struct nft_object_hash_key - key to lookup nft_object + * + * @name: name of the stateful object to look up + * @table: table the object belongs to + */ + struct nft_object_hash_key { + const char *name; + const struct nft_table *table; + }; + + /** * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @table: table this object belongs to - * @name: name of this stateful object + * @key: keys that identify this object + * @rhlhead: nft_objname_ht node * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle * @ops: object operations - * @data: object data, layout depends on type + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; - char *name; - struct nft_table *table; + struct rhlist_head rhlhead; + struct nft_object_hash_key key; u32 genmask:2, use:30; u64 handle; @@@ -1049,11 -1054,12 +1060,12 @@@ static inline void *nft_obj_data(const
#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
- struct nft_object *nft_obj_lookup(const struct nft_table *table, + struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask);
- void nft_obj_notify(struct net *net, struct nft_table *table, + void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp);
@@@ -1329,15 -1335,12 +1341,15 @@@ struct nft_trans_rule struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; };
#define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound)
struct nft_trans_chain { bool update; diff --combined kernel/bpf/btf.c index c57bd10340ed,7019c1f05cab..bd3921b1514b --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@@ -157,7 -157,7 +157,7 @@@ * */
- #define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) + #define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2) #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) @@@ -355,6 -355,11 +355,11 @@@ static bool btf_type_is_struct(const st return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; }
+ static bool __btf_type_is_struct(const struct btf_type *t) + { + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; + } + static bool btf_type_is_array(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; @@@ -525,7 -530,7 +530,7 @@@ const struct btf_type *btf_type_by_id(c
/* * Regular int is not a bit field and it must be either - * u8/u16/u32/u64. + * u8/u16/u32/u64 or __int128. */ static bool btf_type_int_is_regular(const struct btf_type *t) { @@@ -538,7 -543,8 +543,8 @@@ if (BITS_PER_BYTE_MASKED(nr_bits) || BTF_INT_OFFSET(int_data) || (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && - nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { + nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && + nr_bytes != (2 * sizeof(u64)))) { return false; }
@@@ -1063,9 -1069,9 +1069,9 @@@ static int btf_int_check_member(struct nr_copy_bits = BTF_INT_BITS(int_data) + BITS_PER_BYTE_MASKED(struct_bits_off);
- if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; }
@@@ -1119,9 -1125,9 +1125,9 @@@ static int btf_int_check_kflag_member(s
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); - if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; }
@@@ -1168,9 -1174,9 +1174,9 @@@ static s32 btf_int_check_meta(struct bt
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
- if (nr_bits > BITS_PER_U64) { + if (nr_bits > BITS_PER_U128) { btf_verifier_log_type(env, t, "nr_bits exceeds %zu", - BITS_PER_U64); + BITS_PER_U128); return -EINVAL; }
@@@ -1211,31 -1217,93 +1217,93 @@@ static void btf_int_log(struct btf_veri btf_int_encoding_str(BTF_INT_ENCODING(int_data))); }
+ static void btf_int128_print(struct seq_file *m, void *data) + { + /* data points to a __int128 number. + * Suppose + * int128_num = *(__int128 *)data; + * The below formulas shows what upper_num and lower_num represents: + * upper_num = int128_num >> 64; + * lower_num = int128_num & 0xffffffffFFFFFFFFULL; + */ + u64 upper_num, lower_num; + + #ifdef __BIG_ENDIAN_BITFIELD + upper_num = *(u64 *)data; + lower_num = *(u64 *)(data + 8); + #else + upper_num = *(u64 *)(data + 8); + lower_num = *(u64 *)data; + #endif + if (upper_num == 0) + seq_printf(m, "0x%llx", lower_num); + else + seq_printf(m, "0x%llx%016llx", upper_num, lower_num); + } + + static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, + u16 right_shift_bits) + { + u64 upper_num, lower_num; + + #ifdef __BIG_ENDIAN_BITFIELD + upper_num = print_num[0]; + lower_num = print_num[1]; + #else + upper_num = print_num[1]; + lower_num = print_num[0]; + #endif + + /* shake out un-needed bits by shift/or operations */ + if (left_shift_bits >= 64) { + upper_num = lower_num << (left_shift_bits - 64); + lower_num = 0; + } else { + upper_num = (upper_num << left_shift_bits) | + (lower_num >> (64 - left_shift_bits)); + lower_num = lower_num << left_shift_bits; + } + + if (right_shift_bits >= 64) { + lower_num = upper_num >> (right_shift_bits - 64); + upper_num = 0; + } else { + lower_num = (lower_num >> right_shift_bits) | + (upper_num << (64 - right_shift_bits)); + upper_num = upper_num >> right_shift_bits; + } + + #ifdef __BIG_ENDIAN_BITFIELD + print_num[0] = upper_num; + print_num[1] = lower_num; + #else + print_num[0] = lower_num; + print_num[1] = upper_num; + #endif + } + static void btf_bitfield_seq_show(void *data, u8 bits_offset, u8 nr_bits, struct seq_file *m) { u16 left_shift_bits, right_shift_bits; u8 nr_copy_bytes; u8 nr_copy_bits; - u64 print_num; + u64 print_num[2] = {};
nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
- print_num = 0; - memcpy(&print_num, data, nr_copy_bytes); + memcpy(print_num, data, nr_copy_bytes);
#ifdef __BIG_ENDIAN_BITFIELD left_shift_bits = bits_offset; #else - left_shift_bits = BITS_PER_U64 - nr_copy_bits; + left_shift_bits = BITS_PER_U128 - nr_copy_bits; #endif - right_shift_bits = BITS_PER_U64 - nr_bits; + right_shift_bits = BITS_PER_U128 - nr_bits;
- print_num <<= left_shift_bits; - print_num >>= right_shift_bits; - - seq_printf(m, "0x%llx", print_num); + btf_int128_shift(print_num, left_shift_bits, right_shift_bits); + btf_int128_print(m, print_num); }
@@@ -1250,7 -1318,7 +1318,7 @@@ static void btf_int_bits_seq_show(cons
/* * bits_offset is at most 7. - * BTF_INT_OFFSET() cannot exceed 64 bits. + * BTF_INT_OFFSET() cannot exceed 128 bits. */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); @@@ -1274,6 -1342,9 +1342,9 @@@ static void btf_int_seq_show(const stru }
switch (nr_bits) { + case 128: + btf_int128_print(m, data); + break; case 64: if (sign) seq_printf(m, "%lld", *(s64 *)data); @@@ -1459,8 -1530,7 +1530,8 @@@ static int btf_modifier_resolve(struct
/* "typedef void new_void", "const void"...etc */ if (!btf_type_is_void(next_type) && - !btf_type_is_fwd(next_type)) { + !btf_type_is_fwd(next_type) && + !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@@ -1980,6 -2050,43 +2051,43 @@@ static void btf_struct_log(struct btf_v btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); }
+ /* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ + int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) + { + const struct btf_member *member; + u32 i, off = -ENOENT; + + if (!__btf_type_is_struct(t)) + return -EINVAL; + + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + if (!__btf_type_is_struct(member_type)) + continue; + if (member_type->size != sizeof(struct bpf_spin_lock)) + continue; + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), + "bpf_spin_lock")) + continue; + if (off != -ENOENT) + /* only one 'struct bpf_spin_lock' is allowed */ + return -E2BIG; + off = btf_member_bit_offset(t, member); + if (off % 8) + /* valid C code cannot generate such BTF */ + return -EINVAL; + off /= 8; + if (off % __alignof__(struct bpf_spin_lock)) + /* valid struct bpf_spin_lock will be 4 byte aligned */ + return -EINVAL; + } + return off; + } + static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) diff --combined kernel/bpf/cgroup.c index d17d05570a3f,d78cfec5807d..4e807973aa80 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@@ -230,6 -230,7 +230,7 @@@ cleanup * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach * @type: Type of attach operation + * @flags: Option flags * * Must be called with cgroup_mutex held. */ @@@ -363,7 -364,7 +364,7 @@@ cleanup * Must be called with cgroup_mutex held. */ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 unused_flags) + enum bpf_attach_type type) { struct list_head *progs = &cgrp->bpf.progs[type]; enum bpf_cgroup_storage_type stype; @@@ -572,7 -573,7 +573,7 @@@ int __cgroup_bpf_run_filter_skb(struct bpf_compute_and_save_data_end(skb, &saved_data_end);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; diff --combined kernel/bpf/hashtab.c index f9274114c88d,937776531998..fed15cf94dca --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@@ -686,7 -686,7 +686,7 @@@ static void free_htab_elem(struct bpf_h }
if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@@ -718,21 -718,12 +718,12 @@@ static bool fd_htab_map_needs_adjust(co BITS_PER_LONG == 64; }
- static u32 htab_size_value(const struct bpf_htab *htab, bool percpu) - { - u32 size = htab->map.value_size; - - if (percpu || fd_htab_map_needs_adjust(htab)) - size = round_up(size, 8); - return size; - } - static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, struct htab_elem *old_elem) { - u32 size = htab_size_value(htab, percpu); + u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); struct htab_elem *l_new, **pl_new; void __percpu *pptr; @@@ -748,7 -739,7 +739,7 @@@ } else { struct pcpu_freelist_node *l;
- l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); @@@ -770,10 -761,13 +761,13 @@@ l_new = ERR_PTR(-ENOMEM); goto dec_count; } + check_and_init_map_lock(&htab->map, + l_new->key + round_up(key_size, 8)); }
memcpy(l_new->key, key, key_size); if (percpu) { + size = round_up(size, 8); if (prealloc) { pptr = htab_elem_get_ptr(l_new, key_size); } else { @@@ -791,8 -785,13 +785,13 @@@
if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); - } else { + } else if (fd_htab_map_needs_adjust(htab)) { + size = round_up(size, 8); memcpy(l_new->key + round_up(key_size, 8), value, size); + } else { + copy_map_value(&htab->map, + l_new->key + round_up(key_size, 8), + value); }
l_new->hash = hash; @@@ -805,11 -804,11 +804,11 @@@ dec_count static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, u64 map_flags) { - if (l_old && map_flags == BPF_NOEXIST) + if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) /* elem already exists */ return -EEXIST;
- if (!l_old && map_flags == BPF_EXIST) + if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) /* elem doesn't exist, cannot update it */ return -ENOENT;
@@@ -828,7 -827,7 +827,7 @@@ static int htab_map_update_elem(struct u32 key_size, hash; int ret;
- if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL;
@@@ -841,6 -840,28 +840,28 @@@ b = __select_bucket(htab, hash); head = &b->head;
+ if (unlikely(map_flags & BPF_F_LOCK)) { + if (unlikely(!map_value_has_spin_lock(map))) + return -EINVAL; + /* find an element without taking the bucket lock */ + l_old = lookup_nulls_elem_raw(head, hash, key, key_size, + htab->n_buckets); + ret = check_flags(htab, l_old, map_flags); + if (ret) + return ret; + if (l_old) { + /* grab the element lock and update value in place */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + return 0; + } + /* fall through, grab the bucket lock and lookup again. + * 99.9% chance that the element won't be found, + * but second lookup under lock has to be done. + */ + } + /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&b->lock, flags);
@@@ -850,6 -871,20 +871,20 @@@ if (ret) goto err;
+ if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { + /* first lookup without the bucket lock didn't find the element, + * but second lookup with the bucket lock found it. + * This case is highly unlikely, but has to be dealt with: + * grab the element lock in addition to the bucket lock + * and update element in place + */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + ret = 0; + goto err; + } + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, l_old); if (IS_ERR(l_new)) { diff --combined kernel/bpf/syscall.c index 8577bb7f8be6,0834958f1dc4..ec7c552af76b --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@@ -463,7 -463,7 +463,7 @@@ int map_check_no_btf(const struct bpf_m return -ENOTSUPP; }
- static int map_check_btf(const struct bpf_map *map, const struct btf *btf, + static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; @@@ -478,6 -478,22 +478,22 @@@ if (!value_type || value_size != map->value_size) return -EINVAL;
+ map->spin_lock_off = btf_find_spin_lock(btf, value_type); + + if (map_value_has_spin_lock(map)) { + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY && + map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) + return -ENOTSUPP; + if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > + map->value_size) { + WARN_ONCE(1, + "verifier bug spin_lock_off %d value_size %d\n", + map->spin_lock_off, map->value_size); + return -EFAULT; + } + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type);
@@@ -542,6 -558,8 +558,8 @@@ static int map_create(union bpf_attr *a map->btf = btf; map->btf_key_type_id = attr->btf_key_type_id; map->btf_value_type_id = attr->btf_value_type_id; + } else { + map->spin_lock_off = -EINVAL; }
err = security_bpf_map_alloc(map); @@@ -664,7 -682,7 +682,7 @@@ static void *__bpf_copy_key(void __use }
/* last field in 'union bpf_attr' used by this command */ - #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value + #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
static int map_lookup_elem(union bpf_attr *attr) { @@@ -680,6 -698,9 +698,9 @@@ if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL;
+ if (attr->flags & ~BPF_F_LOCK) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) @@@ -690,6 -711,12 +711,12 @@@ goto err_put; }
+ if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@@ -713,13 -740,8 +740,13 @@@
if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@@ -745,14 -767,17 +772,20 @@@ err = -ENOENT; } else { err = 0; - memcpy(value, ptr, value_size); + if (attr->flags & BPF_F_LOCK) + /* lock 'ptr' and copy everything but lock */ + copy_map_value_locked(map, value, ptr, true); + else + copy_map_value(map, value, ptr); + /* mask lock, since value wasn't zero inited */ + check_and_init_map_lock(map, value); } rcu_read_unlock(); } + this_cpu_dec(bpf_prog_active); + preempt_enable();
+done: if (err) goto free_value;
@@@ -808,6 -833,12 +841,12 @@@ static int map_update_elem(union bpf_at goto err_put; }
+ if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); diff --combined kernel/cgroup/cgroup.c index 747e5b17f9da,9f617605dacb..19da0ab89a0c --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@@ -54,7 -54,6 +54,7 @@@ #include <linux/proc_ns.h> #include <linux/nsproxy.h> #include <linux/file.h> +#include <linux/fs_parser.h> #include <linux/sched/cputime.h> #include <linux/psi.h> #include <net/sock.h> @@@ -1773,37 -1772,26 +1773,37 @@@ int cgroup_show_path(struct seq_file *s return len; }
-static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) -{ - char *token; +enum cgroup2_param { + Opt_nsdelegate, + nr__cgroup2_params +};
- *root_flags = 0; +static const struct fs_parameter_spec cgroup2_param_specs[] = { + fsparam_flag ("nsdelegate", Opt_nsdelegate), + {} +};
- if (!data || *data == '\0') - return 0; +static const struct fs_parameter_description cgroup2_fs_parameters = { + .name = "cgroup2", + .specs = cgroup2_param_specs, +};
- while ((token = strsep(&data, ",")) != NULL) { - if (!strcmp(token, "nsdelegate")) { - *root_flags |= CGRP_ROOT_NS_DELEGATE; - continue; - } +static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt;
- pr_err("cgroup2: unknown option "%s"\n", token); - return -EINVAL; - } + opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); + if (opt < 0) + return opt;
- return 0; + switch (opt) { + case Opt_nsdelegate: + ctx->flags |= CGRP_ROOT_NS_DELEGATE; + return 0; + } + return -EINVAL; }
static void apply_cgroup_root_flags(unsigned int root_flags) @@@ -1823,11 -1811,16 +1823,11 @@@ static int cgroup_show_options(struct s return 0; }
-static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) +static int cgroup_reconfigure(struct fs_context *fc) { - unsigned int root_flags; - int ret; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
- ret = parse_cgroup_root_flags(data, &root_flags); - if (ret) - return ret; - - apply_cgroup_root_flags(root_flags); + apply_cgroup_root_flags(ctx->flags); return 0; }
@@@ -1915,9 -1908,8 +1915,9 @@@ static void init_cgroup_housekeeping(st INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); }
-void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) +void init_cgroup_root(struct cgroup_fs_context *ctx) { + struct cgroup_root *root = ctx->root; struct cgroup *cgrp = &root->cgrp;
INIT_LIST_HEAD(&root->root_list); @@@ -1926,16 -1918,16 +1926,16 @@@ init_cgroup_housekeeping(cgrp); idr_init(&root->cgroup_idr);
- root->flags = opts->flags; - if (opts->release_agent) - strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); - if (opts->name) - strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); - if (opts->cpuset_clone_children) + root->flags = ctx->flags; + if (ctx->release_agent) + strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); + if (ctx->name) + strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN); + if (ctx->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); }
-int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@@ -1952,7 -1944,7 +1952,7 @@@ root_cgrp->ancestor_ids[0] = ret;
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, - ref_flags, GFP_KERNEL); + 0, GFP_KERNEL); if (ret) goto out;
@@@ -2036,104 -2028,57 +2036,104 @@@ out return ret; }
-struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup_do_get_tree(struct fs_context *fc) { - struct dentry *dentry; - bool new_sb; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + int ret;
- dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); + ctx->kfc.root = ctx->root->kf_root; + if (fc->fs_type == &cgroup2_fs_type) + ctx->kfc.magic = CGROUP2_SUPER_MAGIC; + else + ctx->kfc.magic = CGROUP_SUPER_MAGIC; + ret = kernfs_get_tree(fc);
/* * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ - if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { + if (!ret && ctx->ns != &init_cgroup_ns) { struct dentry *nsdentry; + struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp;
mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock);
- cgrp = cset_cgroup_from_root(ns->root_cset, root); + cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex);
- nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); - dput(dentry); - dentry = nsdentry; + nsdentry = kernfs_node_dentry(cgrp->kn, sb); + dput(fc->root); + fc->root = nsdentry; + if (IS_ERR(nsdentry)) { + ret = PTR_ERR(nsdentry); + deactivate_locked_super(sb); + } }
- if (IS_ERR(dentry) || !new_sb) - cgroup_put(&root->cgrp); + if (!ctx->kfc.new_sb_created) + cgroup_put(&ctx->root->cgrp);
- return dentry; + return ret; }
-static struct dentry *cgroup_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +/* + * Destroy a cgroup filesystem context. + */ +static void cgroup_fs_context_free(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct dentry *dentry; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + kfree(ctx->name); + kfree(ctx->release_agent); + put_cgroup_ns(ctx->ns); + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static int cgroup_get_tree(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret;
- get_cgroup_ns(ns); + cgrp_dfl_visible = true; + cgroup_get_live(&cgrp_dfl_root.cgrp); + ctx->root = &cgrp_dfl_root;
- /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { - put_cgroup_ns(ns); - return ERR_PTR(-EPERM); - } + ret = cgroup_do_get_tree(fc); + if (!ret) + apply_cgroup_root_flags(ctx->flags); + return ret; +} + +static const struct fs_context_operations cgroup_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_param = cgroup2_parse_param, + .get_tree = cgroup_get_tree, + .reconfigure = cgroup_reconfigure, +}; + +static const struct fs_context_operations cgroup1_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_param = cgroup1_parse_param, + .get_tree = cgroup1_get_tree, + .reconfigure = cgroup1_reconfigure, +}; + +/* + * Initialise the cgroup filesystem creation/reconfiguration context. Notably, + * we select the namespace we're going to use. + */ +static int cgroup_init_fs_context(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx; + + ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM;
/* * The first time anyone tries to mount a cgroup, enable the list @@@ -2142,18 -2087,29 +2142,18 @@@ if (!use_task_css_set_links) cgroup_enable_task_cg_lists();
- if (fs_type == &cgroup2_fs_type) { - unsigned int root_flags; - - ret = parse_cgroup_root_flags(data, &root_flags); - if (ret) { - put_cgroup_ns(ns); - return ERR_PTR(ret); - } - - cgrp_dfl_visible = true; - cgroup_get_live(&cgrp_dfl_root.cgrp); - - dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, - CGROUP2_SUPER_MAGIC, ns); - if (!IS_ERR(dentry)) - apply_cgroup_root_flags(root_flags); - } else { - dentry = cgroup1_mount(&cgroup_fs_type, flags, data, - CGROUP_SUPER_MAGIC, ns); - } - - put_cgroup_ns(ns); - return dentry; + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); + fc->fs_private = &ctx->kfc; + if (fc->fs_type == &cgroup2_fs_type) + fc->ops = &cgroup_fs_context_ops; + else + fc->ops = &cgroup1_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ctx->ns->user_ns); + fc->global = true; + return 0; }
static void cgroup_kill_sb(struct super_block *sb) @@@ -2162,33 -2118,33 +2162,33 @@@ struct cgroup_root *root = cgroup_root_from_kf(kf_root);
/* - * If @root doesn't have any mounts or children, start killing it. + * If @root doesn't have any children, start killing it. * This prevents new mounts by disabling percpu_ref_tryget_live(). * cgroup_mount() may wait for @root's release. * * And don't kill the default root. */ - if (!list_empty(&root->cgrp.self.children) || - root == &cgrp_dfl_root) - cgroup_put(&root->cgrp); - else + if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && + !percpu_ref_is_dying(&root->cgrp.self.refcnt)) percpu_ref_kill(&root->cgrp.self.refcnt); - + cgroup_put(&root->cgrp); kernfs_kill_sb(sb); }
struct file_system_type cgroup_fs_type = { - .name = "cgroup", - .mount = cgroup_mount, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup1_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, };
static struct file_system_type cgroup2_fs_type = { - .name = "cgroup2", - .mount = cgroup_mount, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup2", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup2_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, };
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, @@@ -5311,6 -5267,7 +5311,6 @@@ int cgroup_rmdir(struct kernfs_node *kn
static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .show_options = cgroup_show_options, - .remount_fs = cgroup_remount, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, @@@ -5377,12 -5334,11 +5377,12 @@@ static void __init cgroup_init_subsys(s */ int __init cgroup_init_early(void) { - static struct cgroup_sb_opts __initdata opts; + static struct cgroup_fs_context __initdata ctx; struct cgroup_subsys *ss; int i;
- init_cgroup_root(&cgrp_dfl_root, &opts); + ctx.root = &cgrp_dfl_root; + init_cgroup_root(&ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
RCU_INIT_POINTER(init_task.cgroups, &init_css_set); @@@ -5443,7 -5399,7 +5443,7 @@@ int __init cgroup_init(void hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys));
- BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); + BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
mutex_unlock(&cgroup_mutex);
@@@ -6040,7 -5996,7 +6040,7 @@@ int cgroup_bpf_detach(struct cgroup *cg int ret;
mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, type, flags); + ret = __cgroup_bpf_detach(cgrp, prog, type); mutex_unlock(&cgroup_mutex); return ret; } diff --combined net/batman-adv/bat_v_elp.c index ef0dec20c7d8,7b80f6f8d4dc..a9b7919c9de5 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@@ -1,5 -1,5 +1,5 @@@ // SPDX-License-Identifier: GPL-2.0 - /* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: + /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * @@@ -104,9 -104,6 +104,9 @@@ static u32 batadv_v_elp_get_throughput(
ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+ /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be diff --combined net/batman-adv/hard-interface.c index 415d494cbe22,28c1fb8d1af0..96ef7c70b4d9 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@@ -1,5 -1,5 +1,5 @@@ // SPDX-License-Identifier: GPL-2.0 - /* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: + /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@@ -20,6 -20,7 +20,6 @@@ #include "main.h"
#include <linux/atomic.h> -#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/gfp.h> @@@ -178,10 -179,8 +178,10 @@@ static bool batadv_is_on_batman_iface(c parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + }
if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; diff --combined net/batman-adv/soft-interface.c index b85ca809e509,b14fb3462af7..93a5975c21a4 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@@ -1,5 -1,5 +1,5 @@@ // SPDX-License-Identifier: GPL-2.0 - /* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: + /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@@ -212,6 -212,7 +212,7 @@@ static netdev_tx_t batadv_interface_tx( enum batadv_forw_mode forw_mode; struct batadv_orig_node *mcast_single_orig = NULL; int network_offset = ETH_HLEN; + __be16 proto;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@@ -221,16 -222,17 +222,19 @@@
netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb);
- switch (ntohs(ethhdr->h_proto)) { + proto = ethhdr->h_proto; + + switch (ntohs(proto)) { case ETH_P_8021Q: vhdr = vlan_eth_hdr(skb); + proto = vhdr->h_vlan_encapsulated_proto;
/* drop batman-in-batman packets to prevent loops */ - if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) { + if (proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } @@@ -258,6 -260,9 +262,9 @@@ goto dropped; }
+ /* Snoop address candidates from DHCPACKs for early DAT filling */ + batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid); + /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... * diff --combined net/core/filter.c index 7a54dc11ac2d,3a49f68eda10..b5a002d7b263 --- a/net/core/filter.c +++ b/net/core/filter.c @@@ -4112,12 -4112,10 +4112,12 @@@ BPF_CALL_5(bpf_setsockopt, struct bpf_s /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; @@@ -5316,10 -5314,20 +5316,20 @@@ bpf_base_func_proto(enum bpf_func_id fu return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* else, fall through */ + return bpf_get_trace_printk_proto(); default: return NULL; } @@@ -6710,6 -6718,27 +6720,27 @@@ static u32 bpf_convert_ctx_access(enum target_size)); break;
+ case offsetof(struct __sk_buff, gso_segs): + /* si->dst_reg = skb_shinfo(SKB); */ + #ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); + #else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); + #endif + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_segs, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
@@@ -7700,6 -7729,7 +7731,7 @@@ const struct bpf_verifier_ops flow_diss };
const struct bpf_prog_ops flow_dissector_prog_ops = { + .test_run = bpf_prog_test_run_flow_dissector, };
int sk_detach_filter(struct sock *sk) diff --combined net/core/skmsg.c index 8c826603bf36,e76ed8df9f13..ae6f06e45737 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@@ -78,11 -78,9 +78,9 @@@ int sk_msg_clone(struct sock *sk, struc { int i = src->sg.start; struct scatterlist *sge = sk_msg_elem(src, i); + struct scatterlist *sgd = NULL; u32 sge_len, sge_off;
- if (sk_msg_full(dst)) - return -ENOSPC; - while (off) { if (sge->length > off) break; @@@ -94,16 -92,27 +92,27 @@@ }
while (len) { - if (sk_msg_full(dst)) - return -ENOSPC; - sge_len = sge->length - off; - sge_off = sge->offset + off; if (sge_len > len) sge_len = len; + + if (dst->sg.end) + sgd = sk_msg_elem(dst, dst->sg.end - 1); + + if (sgd && + (sg_page(sge) == sg_page(sgd)) && + (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) { + sgd->length += sge_len; + dst->sg.size += sge_len; + } else if (!sk_msg_full(dst)) { + sge_off = sge->offset + off; + sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); + } else { + return -ENOSPC; + } + off = 0; len -= sge_len; - sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); sk_mem_charge(sk, sge_len); sk_msg_iter_var_next(i); if (i == src->sg.end && len) @@@ -545,7 -554,8 +554,7 @@@ static void sk_psock_destroy_deferred(s struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
/* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp);
cancel_work_sync(&psock->work);
diff --combined net/ipv4/ip_gre.c index 3978f807fa8b,d1cef66820d3..ccee9411dae1 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@@ -449,81 -449,14 +449,14 @@@ static int gre_handle_offloads(struct s return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); }
- static struct rtable *gre_get_rt(struct sk_buff *skb, - struct net_device *dev, - struct flowi4 *fl, - const struct ip_tunnel_key *key) - { - struct net *net = dev_net(dev); - - memset(fl, 0, sizeof(*fl)); - fl->daddr = key->u.ipv4.dst; - fl->saddr = key->u.ipv4.src; - fl->flowi4_tos = RT_TOS(key->tos); - fl->flowi4_mark = skb->mark; - fl->flowi4_proto = IPPROTO_GRE; - - return ip_route_output_key(net, fl); - } - - static struct rtable *prepare_fb_xmit(struct sk_buff *skb, - struct net_device *dev, - struct flowi4 *fl, - int tunnel_hlen) - { - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - struct rtable *rt = NULL; - int min_headroom; - bool use_cache; - int err; - - tun_info = skb_tunnel_info(skb); - key = &tun_info->key; - use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); - - if (use_cache) - rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); - if (!rt) { - rt = gre_get_rt(skb, dev, fl, key); - if (IS_ERR(rt)) - goto err_free_skb; - if (use_cache) - dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, - fl->saddr); - } - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + tunnel_hlen + sizeof(struct iphdr); - if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { - int head_delta = SKB_DATA_ALIGN(min_headroom - - skb_headroom(skb) + - 16); - err = pskb_expand_head(skb, max_t(int, head_delta, 0), - 0, GFP_ATOMIC); - if (unlikely(err)) - goto err_free_rt; - } - return rt; - - err_free_rt: - ip_rt_put(rt); - err_free_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return NULL; - } - static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - struct rtable *rt = NULL; - struct flowi4 fl; int tunnel_hlen; - __be16 df, flags; + __be16 flags;
tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@@ -533,13 -466,12 +466,12 @@@ key = &tun_info->key; tunnel_hlen = gre_calc_hlen(key->tun_flags);
- rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); - if (!rt) - return; + if (skb_cow_head(skb, dev->needed_headroom)) + goto err_free_skb;
/* Push Tunnel header. */ if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) - goto err_free_rt; + goto err_free_skb;
flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); @@@ -547,14 -479,10 +479,10 @@@ tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
- iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); return;
- err_free_rt: - ip_rt_put(rt); err_free_skb: kfree_skb(skb); dev->stats.tx_dropped++; @@@ -566,10 -494,8 +494,8 @@@ static void erspan_fb_xmit(struct sk_bu struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; - struct rtable *rt = NULL; bool truncate = false; - __be16 df, proto; - struct flowi4 fl; + __be16 proto; int tunnel_hlen; int version; int nhoff; @@@ -582,21 -508,20 +508,20 @@@
key = &tun_info->key; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) - goto err_free_rt; + goto err_free_skb; md = ip_tunnel_info_opts(tun_info); if (!md) - goto err_free_rt; + goto err_free_skb;
/* ERSPAN has fixed 8 byte GRE header */ version = md->version; tunnel_hlen = 8 + erspan_hdr_len(version);
- rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); - if (!rt) - return; + if (skb_cow_head(skb, dev->needed_headroom)) + goto err_free_skb;
if (gre_handle_offloads(skb, false)) - goto err_free_rt; + goto err_free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) { pskb_trim(skb, dev->mtu + dev->hard_header_len); @@@ -625,20 -550,16 +550,16 @@@ truncate, true); proto = htons(ETH_P_ERSPAN2); } else { - goto err_free_rt; + goto err_free_skb; }
gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(tunnel->o_seqno++));
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
- iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); return;
- err_free_rt: - ip_rt_put(rt); err_free_skb: kfree_skb(skb); dev->stats.tx_dropped++; @@@ -647,13 -568,18 +568,18 @@@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); + const struct ip_tunnel_key *key; struct rtable *rt; struct flowi4 fl4;
if (ip_tunnel_info_af(info) != AF_INET) return -EINVAL;
- rt = gre_get_rt(skb, dev, &fl4, &info->key); + key = &info->key; + ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, + tunnel_id_to_key32(key->tun_id), key->tos, 0, + skb->mark); + rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt);
@@@ -1455,17 -1381,12 +1381,17 @@@ static int ipgre_fill_info(struct sk_bu { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --combined net/ipv6/ip6_gre.c index 801a9a0c217e,e081e69d534e..65a4f96dc462 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@@ -524,7 -524,7 +524,7 @@@ static int ip6gre_rcv(struct sk_buff *s return PACKET_REJECT; }
- static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, + static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { struct erspan_base_hdr *ershdr; @@@ -607,7 -607,7 +607,7 @@@ static int gre_rcv(struct sk_buff *skb
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) return 0; goto out; } @@@ -2098,17 -2098,12 +2098,17 @@@ static int ip6gre_fill_info(struct sk_b { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --combined net/mac80211/tx.c index 928f13a208b0,61c7ea9de2cc..8a49a74c0a37 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@@ -1449,6 -1449,7 +1449,7 @@@ void ieee80211_txq_init(struct ieee8021 codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); __skb_queue_head_init(&txqi->frags); + INIT_LIST_HEAD(&txqi->schedule_order);
txqi->txq.vif = &sdata->vif;
@@@ -1487,8 -1488,14 +1488,14 @@@ void ieee80211_txq_purge(struct ieee802 struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin;
+ spin_lock_bh(&fq->lock); fq_tin_reset(fq, tin, fq_skb_free_func); ieee80211_purge_tx_queue(&local->hw, &txqi->frags); + spin_unlock_bh(&fq->lock); + + spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]); + list_del_init(&txqi->schedule_order); + spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]); }
void ieee80211_txq_set_params(struct ieee80211_local *local) @@@ -1605,7 -1612,7 +1612,7 @@@ static bool ieee80211_queue_skb(struct ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock);
- drv_wake_tx_queue(local, txqi); + schedule_and_wake_txq(local, txqi);
return true; } @@@ -1938,16 -1945,9 +1945,16 @@@ static int ieee80211_skb_resize(struct int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0;
- if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@@ -1955,7 -1955,8 +1962,7 @@@
if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@@ -3636,6 -3637,151 +3643,151 @@@ out } EXPORT_SYMBOL(ieee80211_tx_dequeue);
+ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) + { + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = NULL; + + lockdep_assert_held(&local->active_txq_lock[ac]); + + begin: + txqi = list_first_entry_or_null(&local->active_txqs[ac], + struct txq_info, + schedule_order); + if (!txqi) + return NULL; + + if (txqi->txq.sta) { + struct sta_info *sta = container_of(txqi->txq.sta, + struct sta_info, sta); + + if (sta->airtime[txqi->txq.ac].deficit < 0) { + sta->airtime[txqi->txq.ac].deficit += + sta->airtime_weight; + list_move_tail(&txqi->schedule_order, + &local->active_txqs[txqi->txq.ac]); + goto begin; + } + } + + + if (txqi->schedule_round == local->schedule_round[ac]) + return NULL; + + list_del_init(&txqi->schedule_order); + txqi->schedule_round = local->schedule_round[ac]; + return &txqi->txq; + } + EXPORT_SYMBOL(ieee80211_next_txq); + + void ieee80211_return_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) + { + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = to_txq_info(txq); + + lockdep_assert_held(&local->active_txq_lock[txq->ac]); + + if (list_empty(&txqi->schedule_order) && + (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { + /* If airtime accounting is active, always enqueue STAs at the + * head of the list to ensure that they only get moved to the + * back by the airtime DRR scheduler once they have a negative + * deficit. A station that already has a negative deficit will + * get immediately moved to the back of the list on the next + * call to ieee80211_next_txq(). + */ + if (txqi->txq.sta && + wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + list_add(&txqi->schedule_order, + &local->active_txqs[txq->ac]); + else + list_add_tail(&txqi->schedule_order, + &local->active_txqs[txq->ac]); + } + } + EXPORT_SYMBOL(ieee80211_return_txq); + + void ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) + __acquires(txq_lock) __releases(txq_lock) + { + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->active_txq_lock[txq->ac]); + ieee80211_return_txq(hw, txq); + spin_unlock_bh(&local->active_txq_lock[txq->ac]); + } + EXPORT_SYMBOL(ieee80211_schedule_txq); + + bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) + { + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *iter, *tmp, *txqi = to_txq_info(txq); + struct sta_info *sta; + u8 ac = txq->ac; + + lockdep_assert_held(&local->active_txq_lock[ac]); + + if (!txqi->txq.sta) + goto out; + + if (list_empty(&txqi->schedule_order)) + goto out; + + list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac], + schedule_order) { + if (iter == txqi) + break; + + if (!iter->txq.sta) { + list_move_tail(&iter->schedule_order, + &local->active_txqs[ac]); + continue; + } + sta = container_of(iter->txq.sta, struct sta_info, sta); + if (sta->airtime[ac].deficit < 0) + sta->airtime[ac].deficit += sta->airtime_weight; + list_move_tail(&iter->schedule_order, &local->active_txqs[ac]); + } + + sta = container_of(txqi->txq.sta, struct sta_info, sta); + if (sta->airtime[ac].deficit >= 0) + goto out; + + sta->airtime[ac].deficit += sta->airtime_weight; + list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); + + return false; + out: + if (!list_empty(&txqi->schedule_order)) + list_del_init(&txqi->schedule_order); + + return true; + } + EXPORT_SYMBOL(ieee80211_txq_may_transmit); + + void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) + __acquires(txq_lock) + { + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->active_txq_lock[ac]); + local->schedule_round[ac]++; + } + EXPORT_SYMBOL(ieee80211_txq_schedule_start); + + void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) + __releases(txq_lock) + { + struct ieee80211_local *local = hw_to_local(hw); + + spin_unlock_bh(&local->active_txq_lock[ac]); + } + EXPORT_SYMBOL(ieee80211_txq_schedule_end); + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) diff --combined net/netfilter/nf_conntrack_core.c index db4d46332e86,815956ac5a76..08ee03407ace --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@@ -222,6 -222,24 +222,24 @@@ static u32 hash_conntrack(const struct return scale_hash(hash_conntrack_raw(tuple, net)); }
+ static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) + { struct { + __be16 sport; + __be16 dport; + } _inet_hdr, *inet_hdr; + + /* Actually only need first 4 bytes to get ports. */ + inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); + if (!inet_hdr) + return false; + + tuple->src.u.udp.port = inet_hdr->sport; + tuple->dst.u.udp.port = inet_hdr->dport; + return true; + } + static bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, @@@ -229,16 -247,11 +247,11 @@@ u_int16_t l3num, u_int8_t protonum, struct net *net, - struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto) + struct nf_conntrack_tuple *tuple) { unsigned int size; const __be32 *ap; __be32 _addrs[8]; - struct { - __be16 sport; - __be16 dport; - } _inet_hdr, *inet_hdr;
memset(tuple, 0, sizeof(*tuple));
@@@ -274,16 -287,36 +287,36 @@@ tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- if (unlikely(l4proto->pkt_to_tuple)) - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); - - /* Actually only need first 4 bytes to get ports. */ - inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); - if (!inet_hdr) - return false; + switch (protonum) { + #if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); + #endif + case IPPROTO_ICMP: + return icmp_pkt_to_tuple(skb, dataoff, net, tuple); + #ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return gre_pkt_to_tuple(skb, dataoff, net, tuple); + #endif + case IPPROTO_TCP: + case IPPROTO_UDP: /* fallthrough */ + return nf_ct_get_tuple_ports(skb, dataoff, tuple); + #ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); + #endif + #ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); + #endif + #ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); + #endif + default: + break; + }
- tuple->src.u.udp.port = inet_hdr->sport; - tuple->dst.u.udp.port = inet_hdr->dport; return true; }
@@@ -366,33 -399,20 +399,20 @@@ bool nf_ct_get_tuplepr(const struct sk_ u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct nf_conntrack_l4proto *l4proto; u8 protonum; int protoff; - int ret; - - rcu_read_lock();
protoff = get_l4proto(skb, nhoff, l3num, &protonum); - if (protoff <= 0) { - rcu_read_unlock(); + if (protoff <= 0) return false; - } - - l4proto = __nf_ct_l4proto_find(protonum);
- ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, - l4proto); - - rcu_read_unlock(); - return ret; + return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_tuple *orig) { memset(inverse, 0, sizeof(*inverse));
@@@ -415,8 -435,14 +435,14 @@@
inverse->dst.protonum = orig->dst.protonum;
- if (unlikely(l4proto->invert_tuple)) - return l4proto->invert_tuple(inverse, orig); + switch (orig->dst.protonum) { + case IPPROTO_ICMP: + return nf_conntrack_invert_icmp_tuple(inverse, orig); + #if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return nf_conntrack_invert_icmpv6_tuple(inverse, orig); + #endif + }
inverse->src.u.all = orig->dst.u.all; inverse->dst.u.all = orig->src.u.all; @@@ -526,11 -552,20 +552,20 @@@ void nf_ct_tmpl_free(struct nf_conn *tm } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
+ static void destroy_gre_conntrack(struct nf_conn *ct) + { + #ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_conn *master = ct->master; + + if (master) + nf_ct_gre_keymap_destroy(master); + #endif + } + static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct); WARN_ON(atomic_read(&nfct->use) != 0); @@@ -539,9 -574,9 +574,9 @@@ nf_ct_tmpl_free(ct); return; } - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); - if (l4proto->destroy) - l4proto->destroy(ct); + + if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) + destroy_gre_conntrack(ct);
local_bh_disable(); /* Expectations will have been removed in clean_from_lists, @@@ -840,7 -875,7 +875,7 @@@ static int nf_ct_resolve_clash(struct n enum ip_conntrack_info oldinfo; struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->allow_clash && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { @@@ -1007,22 -1042,6 +1042,22 @@@ nf_conntrack_tuple_taken(const struct n }
if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; @@@ -1128,7 -1147,7 +1163,7 @@@ static bool gc_worker_can_early_drop(co if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true;
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) return true;
@@@ -1358,7 -1377,6 +1393,6 @@@ EXPORT_SYMBOL_GPL(nf_conntrack_free) static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { @@@ -1371,7 -1389,7 +1405,7 @@@ struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { + if (!nf_ct_invert_tuple(&repl_tuple, tuple)) { pr_debug("Can't invert tuple.\n"); return NULL; } @@@ -1453,7 -1471,6 +1487,6 @@@ resolve_normal_ct(struct nf_conn *tmpl struct sk_buff *skb, unsigned int dataoff, u_int8_t protonum, - const struct nf_conntrack_l4proto *l4proto, const struct nf_hook_state *state) { const struct nf_conntrack_zone *zone; @@@ -1466,7 -1483,7 +1499,7 @@@
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, - &tuple, l4proto)) { + &tuple)) { pr_debug("Can't get tuple\n"); return 0; } @@@ -1476,7 -1493,7 +1509,7 @@@ hash = hash_conntrack_raw(&tuple, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); if (!h) { - h = init_conntrack(state->net, tmpl, &tuple, l4proto, + h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); if (!h) return 0; @@@ -1538,10 -1555,66 +1571,66 @@@ nf_conntrack_handle_icmp(struct nf_con return ret; }
+ static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, + enum ip_conntrack_info ctinfo) + { + const unsigned int *timeout = nf_ct_timeout_lookup(ct); + + if (!timeout) + timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; + + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); + return NF_ACCEPT; + } + + /* Returns verdict for packet, or -1 for invalid. */ + static int nf_conntrack_handle_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) + { + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + return nf_conntrack_tcp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_UDP: + return nf_conntrack_udp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_ICMP: + return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); + #if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); + #endif + #ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_conntrack_udplite_packet(ct, skb, dataoff, + ctinfo, state); + #endif + #ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_conntrack_sctp_packet(ct, skb, dataoff, + ctinfo, state); + #endif + #ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_conntrack_dccp_packet(ct, skb, dataoff, + ctinfo, state); + #endif + #ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return nf_conntrack_gre_packet(ct, skb, dataoff, + ctinfo, state); + #endif + } + + return generic_packet(ct, skb, ctinfo); + } + unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { - const struct nf_conntrack_l4proto *l4proto; enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; @@@ -1568,8 -1641,6 +1657,6 @@@ goto out; }
- l4proto = __nf_ct_l4proto_find(protonum); - if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) { ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff, protonum, state); @@@ -1583,7 -1654,7 +1670,7 @@@ } repeat: ret = resolve_normal_ct(tmpl, skb, dataoff, - protonum, l4proto, state); + protonum, state); if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(state->net, drop); @@@ -1599,7 -1670,7 +1686,7 @@@ goto out; }
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, state); + ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ @@@ -1630,19 -1701,6 +1717,6 @@@ out } EXPORT_SYMBOL_GPL(nf_conntrack_in);
- bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig) - { - bool ret; - - rcu_read_lock(); - ret = nf_ct_invert_tuple(inverse, orig, - __nf_ct_l4proto_find(orig->dst.protonum)); - rcu_read_unlock(); - return ret; - } - EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); - /* Alter reply tuple (maybe alter helper). This is for NAT, and is implicitly racy: see __nf_conntrack_confirm */ void nf_conntrack_alter_reply(struct nf_conn *ct, @@@ -1773,7 -1831,6 +1847,6 @@@ static void nf_conntrack_attach(struct
static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { - const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; @@@ -1794,10 -1851,8 +1867,8 @@@ if (dataoff <= 0) return -1;
- l4proto = nf_ct_l4proto_find_get(l4num); - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, - l4num, net, &tuple, l4proto)) + l4num, net, &tuple)) return -1;
if (ct->status & IPS_SRC_NAT) { @@@ -2429,15 -2484,10 +2500,10 @@@ int nf_conntrack_init_net(struct net *n nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); nf_conntrack_helper_pernet_init(net); + nf_conntrack_proto_pernet_init(net);
- ret = nf_conntrack_proto_pernet_init(net); - if (ret < 0) - goto err_proto; return 0;
- err_proto: - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); err_expect: free_percpu(net->ct.stat); err_pcpu_lists: diff --combined net/netfilter/nf_tables_api.c index 5a92f23f179f,e92bedd09cde..5ca5ec8f3cf0 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -37,10 -37,16 +37,16 @@@ enum NFT_VALIDATE_DO, };
+ static struct rhltable nft_objname_ht; + static u32 nft_chain_hash(const void *data, u32 len, u32 seed); static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed); static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
+ static u32 nft_objname_hash(const void *data, u32 len, u32 seed); + static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed); + static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *); + static const struct rhashtable_params nft_chain_ht_params = { .head_offset = offsetof(struct nft_chain, rhlhead), .key_offset = offsetof(struct nft_chain, name), @@@ -51,6 -57,15 +57,15 @@@ .automatic_shrinking = true, };
+ static const struct rhashtable_params nft_objname_ht_params = { + .head_offset = offsetof(struct nft_object, rhlhead), + .key_offset = offsetof(struct nft_object, key), + .hashfn = nft_objname_hash, + .obj_hashfn = nft_objname_hash_obj, + .obj_cmpfn = nft_objname_hash_cmp, + .automatic_shrinking = true, + }; + static void nft_validate_state_update(struct net *net, u8 new_validate_state) { switch (net->nft.validate_state) { @@@ -116,23 -131,6 +131,23 @@@ static void nft_trans_destroy(struct nf kfree(trans); }
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_set_is_anonymous(set)) + return; + + list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + nft_trans_set(trans) == set) { + nft_trans_set_bound(trans) = true; + break; + } + } +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@@ -228,6 -226,18 +243,6 @@@ static int nft_delchain(struct nft_ctx return err; }
-/* either expr ops provide both activate/deactivate, or neither */ -static bool nft_expr_check_ops(const struct nft_expr_ops *ops) -{ - if (!ops) - return true; - - if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) - return false; - - return true; -} - static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@@ -243,15 -253,14 +258,15 @@@ }
static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule) + struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr;
expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { if (expr->ops->deactivate) - expr->ops->deactivate(ctx, expr); + expr->ops->deactivate(ctx, expr, phase);
expr = nft_expr_next(expr); } @@@ -302,7 -311,7 +317,7 @@@ static int nft_delrule(struct nft_ctx * nft_trans_destroy(trans); return err; } - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
return 0; } @@@ -820,6 -829,34 +835,34 @@@ static int nft_chain_hash_cmp(struct rh return strcmp(chain->name, name); }
+ static u32 nft_objname_hash(const void *data, u32 len, u32 seed) + { + const struct nft_object_hash_key *k = data; + + seed ^= hash_ptr(k->table, 32); + + return jhash(k->name, strlen(k->name), seed); + } + + static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed) + { + const struct nft_object *obj = data; + + return nft_objname_hash(&obj->key, 0, seed); + } + + static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) + { + const struct nft_object_hash_key *k = arg->key; + const struct nft_object *obj = ptr; + + if (obj->key.table != k->table) + return -1; + + return strcmp(obj->key.name, k->name); + } + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@@ -1076,7 -1113,7 +1119,7 @@@ nft_chain_lookup_byhandle(const struct return ERR_PTR(-ENOENT); }
- static bool lockdep_commit_lock_is_held(struct net *net) + static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING return lockdep_is_held(&net->nft.commit_mutex); @@@ -1935,6 -1972,9 +1978,6 @@@ static int nf_tables_delchain(struct ne */ int nft_register_expr(struct nft_expr_type *type) { - if (!nft_expr_check_ops(type->ops)) - return -EINVAL; - nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@@ -2082,6 -2122,10 +2125,6 @@@ static int nf_tables_expr_parse(const s err = PTR_ERR(ops); goto err1; } - if (!nft_expr_check_ops(ops)) { - err = -EINVAL; - goto err1; - } } else ops = type->ops;
@@@ -2510,7 -2554,7 +2553,7 @@@ static void nf_tables_rule_destroy(cons static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); }
@@@ -2564,6 -2608,9 +2607,9 @@@ static int nft_table_validate(struct ne return 0; }
+ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla); + #define NFT_RULE_MAXEXPRS 128
static int nf_tables_newrule(struct net *net, struct sock *nlsk, @@@ -2633,6 -2680,12 +2679,12 @@@ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); } + } else if (nla[NFTA_RULE_POSITION_ID]) { + old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]); + if (IS_ERR(old_rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); + return PTR_ERR(old_rule); + } } }
@@@ -3707,30 -3760,39 +3759,30 @@@ int nf_tables_bind_set(const struct nft bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); + return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set);
-void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) -{ - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) - list_add_tail_rcu(&set->list, &ctx->table->sets); - - list_add_tail_rcu(&binding->list, &set->bindings); -} -EXPORT_SYMBOL_GPL(nf_tables_rebind_set); - void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); + } } EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) { - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(set); - } } EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
@@@ -3841,7 -3903,7 +3893,7 @@@ static int nf_tables_fill_setelem(struc
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nla_put_string(skb, NFTA_SET_ELEM_OBJREF, - (*nft_set_ext_obj(ext))->name) < 0) + (*nft_set_ext_obj(ext))->key.name) < 0) goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && @@@ -4374,7 -4436,8 +4426,8 @@@ static int nft_add_set_elem(struct nft_ err = -EINVAL; goto err2; } - obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + obj = nft_obj_lookup(ctx->net, ctx->table, + nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@@ -4809,18 -4872,36 +4862,36 @@@ void nft_unregister_obj(struct nft_obje } EXPORT_SYMBOL_GPL(nft_unregister_obj);
- struct nft_object *nft_obj_lookup(const struct nft_table *table, + struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask) { + struct nft_object_hash_key k = { .table = table }; + char search[NFT_OBJ_MAXNAMELEN]; + struct rhlist_head *tmp, *list; struct nft_object *obj;
- list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nla_strcmp(nla, obj->name) && - objtype == obj->ops->type->type && - nft_active_genmask(obj, genmask)) + nla_strlcpy(search, nla, sizeof(search)); + k.name = search; + + WARN_ON_ONCE(!rcu_read_lock_held() && + !lockdep_commit_lock_is_held(net)); + + rcu_read_lock(); + list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params); + if (!list) + goto out; + + rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) { + if (objtype == obj->ops->type->type && + nft_active_genmask(obj, genmask)) { + rcu_read_unlock(); return obj; + } } + out: + rcu_read_unlock(); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(nft_obj_lookup); @@@ -4978,7 -5059,7 +5049,7 @@@ static int nf_tables_newobj(struct net }
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err != -ENOENT) { @@@ -5004,11 -5085,11 +5075,11 @@@ err = PTR_ERR(obj); goto err1; } - obj->table = table; + obj->key.table = table; obj->handle = nf_tables_alloc_handle(table);
- obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); - if (!obj->name) { + obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->key.name) { err = -ENOMEM; goto err2; } @@@ -5017,11 -5098,20 +5088,20 @@@ if (err < 0) goto err3;
+ err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, + nft_objname_ht_params); + if (err < 0) + goto err4; + list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; + err4: + /* queued in transaction log */ + INIT_LIST_HEAD(&obj->list); + return err; err3: - kfree(obj->name); + kfree(obj->key.name); err2: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); @@@ -5050,7 -5140,7 +5130,7 @@@ static int nf_tables_fill_obj_info(stru nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || - nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || @@@ -5205,7 -5295,7 +5285,7 @@@ static int nf_tables_getobj(struct net }
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return PTR_ERR(obj); @@@ -5236,7 -5326,7 +5316,7 @@@ static void nft_obj_destroy(const struc obj->ops->destroy(ctx, obj);
module_put(obj->ops->type->owner); - kfree(obj->name); + kfree(obj->key.name); kfree(obj); }
@@@ -5270,7 -5360,7 +5350,7 @@@ static int nf_tables_delobj(struct net obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask); } else { attr = nla[NFTA_OBJ_NAME]; - obj = nft_obj_lookup(table, attr, objtype, genmask); + obj = nft_obj_lookup(net, table, attr, objtype, genmask); }
if (IS_ERR(obj)) { @@@ -5287,7 -5377,7 +5367,7 @@@ return nft_delobj(&ctx, obj); }
- void nft_obj_notify(struct net *net, struct nft_table *table, + void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { @@@ -6394,6 -6484,12 +6474,12 @@@ static void nf_tables_commit_chain(stru nf_tables_commit_chain_free_rules_old(g0); }
+ static void nft_obj_del(struct nft_object *obj) + { + rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); + list_del_rcu(&obj->list); + } + static void nft_chain_del(struct nft_chain *chain) { struct nft_table *table = chain->table; @@@ -6525,9 -6621,6 +6611,9 @@@ static int nf_tables_commit(struct net nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_DELRULE); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@@ -6573,7 -6666,7 +6659,7 @@@ nft_trans_destroy(trans); break; case NFT_MSG_DELOBJ: - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), NFT_MSG_DELOBJ); break; @@@ -6614,8 -6707,7 +6700,8 @@@ static void nf_tables_abort_release(str nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + if (!nft_trans_set_bound(trans)) + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@@ -6676,9 -6768,7 +6762,9 @@@ static int __nf_tables_abort(struct ne case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; @@@ -6688,8 -6778,7 +6774,8 @@@ break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del_rcu(&nft_trans_set(trans)->list); + if (!nft_trans_set_bound(trans)) + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; @@@ -6713,7 -6802,7 +6799,7 @@@ break; case NFT_MSG_NEWOBJ: trans->ctx.table->use--; - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); break; case NFT_MSG_DELOBJ: trans->ctx.table->use++; @@@ -7327,7 -7416,7 +7413,7 @@@ static void __nft_release_tables(struc nft_set_destroy(set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { - list_del(&obj->list); + nft_obj_del(obj); table->use--; nft_obj_destroy(&ctx, obj); } @@@ -7389,12 -7478,18 +7475,18 @@@ static int __init nf_tables_module_init if (err < 0) goto err3;
+ err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params); + if (err < 0) + goto err4; + /* must be last */ err = nfnetlink_subsys_register(&nf_tables_subsys); if (err < 0) - goto err4; + goto err5;
return err; + err5: + rhltable_destroy(&nft_objname_ht); err4: unregister_netdevice_notifier(&nf_tables_flowtable_notifier); err3: @@@ -7414,6 -7509,7 +7506,7 @@@ static void __exit nf_tables_module_exi unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); + rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); }
diff --combined net/netfilter/nft_dynset.c index f1172f99752b,9658493d37d4..a8a74a16f9c4 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@@ -62,9 -62,8 +62,8 @@@ err1 return NULL; }
- static void nft_dynset_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) + void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; @@@ -235,17 -234,20 +234,17 @@@ err1 return err; }
-static void nft_dynset_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_dynset *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_dynset_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); }
static void nft_dynset_destroy(const struct nft_ctx *ctx, @@@ -293,6 -295,7 +292,6 @@@ static const struct nft_expr_ops nft_dy .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, - .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --combined net/netfilter/nft_immediate.c index 3f6d1d2a6281,3e5ed787b1d4..5ec43124cbca --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@@ -17,9 -17,9 +17,9 @@@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h>
- static void nft_immediate_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) + void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_immediate_expr *priv = nft_expr_priv(expr);
@@@ -72,14 -72,10 +72,14 @@@ static void nft_immediate_activate(cons }
static void nft_immediate_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ if (phase == NFT_TRANS_COMMIT) + return; + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); }
diff --combined net/netfilter/nft_objref.c index ae178e914486,c1f2adf198a0..79ef074c18ca --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@@ -38,7 -38,8 +38,8 @@@ static int nft_objref_init(const struc return -EINVAL;
objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); - obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, + obj = nft_obj_lookup(ctx->net, ctx->table, + tb[NFTA_OBJREF_IMM_NAME], objtype, genmask); if (IS_ERR(obj)) return -ENOENT; @@@ -53,7 -54,7 +54,7 @@@ static int nft_objref_dump(struct sk_bu { const struct nft_object *obj = nft_objref_priv(expr);
- if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || + if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->ops->type->type))) goto nla_put_failure; @@@ -155,17 -156,20 +156,17 @@@ nla_put_failure return -1; }
-static void nft_objref_map_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_objref_map *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_objref_map_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); }
static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@@ -182,6 -186,7 +183,6 @@@ static const struct nft_expr_ops nft_ob .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, - .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, diff --combined net/sctp/socket.c index 65d6d04546ae,9644bdc8e85c..a78e55a1bb9c --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -248,7 -248,7 +248,7 @@@ struct sctp_association *sctp_id2assoc( }
/* Otherwise this is a UDP-style socket. */ - if (!id || (id == (sctp_assoc_t)-1)) + if (id <= SCTP_ALL_ASSOC) return NULL;
spin_lock_bh(&sctp_assocs_id_lock); @@@ -2027,7 -2027,7 +2027,7 @@@ static int sctp_sendmsg(struct sock *sk struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@@ -2053,7 -2053,7 +2053,7 @@@
/* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) @@@ -2750,12 -2750,13 +2750,13 @@@ static int sctp_setsockopt_peer_addr_pa return -EINVAL; }
- /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Heartbeat demand can only be sent on a transport or @@@ -2797,6 -2798,43 +2798,43 @@@ static inline __u32 sctp_spp_sackdelay_ return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; }
+ static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, + struct sctp_association *asoc) + { + struct sctp_transport *trans; + + if (params->sack_delay) { + asoc->sackdelay = msecs_to_jiffies(params->sack_delay); + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + if (params->sack_freq == 1) { + asoc->param_flags = + sctp_spp_sackdelay_disable(asoc->param_flags); + } else if (params->sack_freq > 1) { + asoc->sackfreq = params->sack_freq; + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (params->sack_delay) { + trans->sackdelay = msecs_to_jiffies(params->sack_delay); + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + if (params->sack_freq == 1) { + trans->param_flags = + sctp_spp_sackdelay_disable(trans->param_flags); + } else if (params->sack_freq > 1) { + trans->sackfreq = params->sack_freq; + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + } + } + /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * @@@ -2836,10 -2874,9 +2874,9 @@@ static int sctp_setsockopt_delayed_ack(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_sack_info params; - struct sctp_transport *trans = NULL; - struct sctp_association *asoc = NULL; - struct sctp_sock *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sack_info params;
if (optlen == sizeof(struct sctp_sack_info)) { if (copy_from_user(¶ms, optval, optlen)) @@@ -2867,67 -2904,42 +2904,42 @@@ if (params.sack_delay > 500) return -EINVAL;
- /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- if (params.sack_delay) { - if (asoc) { - asoc->sackdelay = - msecs_to_jiffies(params.sack_delay); - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + if (asoc) { + sctp_apply_asoc_delayed_ack(¶ms, asoc); + + return 0; + } + + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) { + if (params.sack_delay) { sp->sackdelay = params.sack_delay; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } - } - - if (params.sack_freq == 1) { - if (asoc) { - asoc->param_flags = - sctp_spp_sackdelay_disable(asoc->param_flags); - } else { + if (params.sack_freq == 1) { sp->param_flags = sctp_spp_sackdelay_disable(sp->param_flags); - } - } else if (params.sack_freq > 1) { - if (asoc) { - asoc->sackfreq = params.sack_freq; - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + } else if (params.sack_freq > 1) { sp->sackfreq = params.sack_freq; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } }
- /* If change is for association, also apply to each transport. */ - if (asoc) { - list_for_each_entry(trans, &asoc->peer.transport_addr_list, - transports) { - if (params.sack_delay) { - trans->sackdelay = - msecs_to_jiffies(params.sack_delay); - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - if (params.sack_freq == 1) { - trans->param_flags = - sctp_spp_sackdelay_disable(trans->param_flags); - } else if (params.sack_freq > 1) { - trans->sackfreq = params.sack_freq; - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - } - } + if (params.sack_assoc_id == SCTP_CURRENT_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + sctp_apply_asoc_delayed_ack(¶ms, asoc);
return 0; } @@@ -2997,15 -3009,22 +3009,22 @@@ static int sctp_setsockopt_default_send return -EINVAL;
asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.sinfo_stream; asoc->default_flags = info.sinfo_flags; asoc->default_ppid = info.sinfo_ppid; asoc->default_context = info.sinfo_context; asoc->default_timetolive = info.sinfo_timetolive; - } else { + + return 0; + } + + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; sp->default_flags = info.sinfo_flags; sp->default_ppid = info.sinfo_ppid; @@@ -3013,6 -3032,17 +3032,17 @@@ sp->default_timetolive = info.sinfo_timetolive; }
+ if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.sinfo_stream; + asoc->default_flags = info.sinfo_flags; + asoc->default_ppid = info.sinfo_ppid; + asoc->default_context = info.sinfo_context; + asoc->default_timetolive = info.sinfo_timetolive; + } + } + return 0; }
@@@ -3037,20 -3067,37 +3067,37 @@@ static int sctp_setsockopt_default_sndi return -EINVAL;
asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.snd_sid; asoc->default_flags = info.snd_flags; asoc->default_ppid = info.snd_ppid; asoc->default_context = info.snd_context; - } else { + + return 0; + } + + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; sp->default_flags = info.snd_flags; sp->default_ppid = info.snd_ppid; sp->default_context = info.snd_context; }
+ if (info.snd_assoc_id == SCTP_CURRENT_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.snd_sid; + asoc->default_flags = info.snd_flags; + asoc->default_ppid = info.snd_ppid; + asoc->default_context = info.snd_context; + } + } + return 0; }
@@@ -3144,7 -3191,8 +3191,8 @@@ static int sctp_setsockopt_rtoinfo(stru asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
/* Set the values to the specific association */ - if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
rto_max = rtoinfo.srto_max; @@@ -3206,7 -3254,8 +3254,8 @@@ static int sctp_setsockopt_associnfo(st
asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
- if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Set the values to the specific association */ @@@ -3319,7 -3368,7 +3368,7 @@@ static int sctp_setsockopt_maxseg(struc current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@@ -3329,6 -3378,9 +3378,9 @@@ }
asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
if (val) { int min_len, max_len; @@@ -3346,8 -3398,6 +3398,6 @@@ asoc->user_frag = val; sctp_assoc_update_frag_point(asoc); } else { - if (params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; sp->user_frag = val; }
@@@ -3460,8 -3510,8 +3510,8 @@@ static int sctp_setsockopt_adaptation_l static int sctp_setsockopt_context(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc;
if (optlen != sizeof(struct sctp_assoc_value)) @@@ -3469,17 -3519,26 +3519,26 @@@ if (copy_from_user(¶ms, optval, optlen)) return -EFAULT;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; + if (asoc) { asoc->default_rcv_context = params.assoc_value; - } else { - sp->default_rcv_context = params.assoc_value; + + return 0; }
+ if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_rcv_context = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->default_rcv_context = params.assoc_value; + return 0; }
@@@ -3580,11 -3639,9 +3639,9 @@@ static int sctp_setsockopt_maxburst(str char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; - int val; - int assoc_id = 0;
if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED @@@ -3592,25 -3649,34 +3649,34 @@@ "Use of int in max_burst socket option deprecated.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(&val, optval, optlen)) + if (copy_from_user(¶ms.assoc_value, optval, optlen)) return -EFAULT; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - val = params.assoc_value; - assoc_id = params.assoc_id; } else return -EINVAL;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (assoc_id != 0) { - asoc = sctp_id2assoc(sk, assoc_id); - if (!asoc) - return -EINVAL; - asoc->max_burst = val; - } else - sp->max_burst = val; + if (asoc) { + asoc->max_burst = params.assoc_value; + + return 0; + } + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->max_burst = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->max_burst = params.assoc_value;
return 0; } @@@ -3702,7 -3768,7 +3768,7 @@@ static int sctp_setsockopt_auth_key(str struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkey *authkey; struct sctp_association *asoc; - int ret; + int ret = -EINVAL;
if (!ep->auth_enable) return -EACCES; @@@ -3712,25 -3778,44 +3778,44 @@@ /* authkey->sca_keylength is u16, so optlen can't be bigger than * this. */ - optlen = min_t(unsigned int, optlen, USHRT_MAX + - sizeof(struct sctp_authkey)); + optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey));
authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey);
- if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) { - ret = -EINVAL; + if (authkey->sca_keylength > optlen - sizeof(*authkey)) goto out; - }
asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); - if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { - ret = -EINVAL; + if (!asoc && authkey->sca_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + if (asoc) { + ret = sctp_auth_set_key(ep, asoc, authkey); goto out; }
- ret = sctp_auth_set_key(ep, asoc, authkey); + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_key(ep, asoc, authkey); + if (ret) + goto out; + } + + ret = 0; + + if (authkey->sca_assoc_id == SCTP_CURRENT_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_key(ep, asoc, authkey); + + if (res && !ret) + ret = res; + } + } + out: kzfree(authkey); return ret; @@@ -3747,8 -3832,9 +3832,9 @@@ static int sctp_setsockopt_active_key(s unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0;
if (!ep->auth_enable) return -EACCES; @@@ -3759,10 -3845,32 +3845,32 @@@ return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_active_key(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; }
/* @@@ -3775,8 -3883,9 +3883,9 @@@ static int sctp_setsockopt_del_key(stru unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0;
if (!ep->auth_enable) return -EACCES; @@@ -3787,11 -3896,32 +3896,32 @@@ return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_del_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + }
+ return ret; }
/* @@@ -3803,8 -3933,9 +3933,9 @@@ static int sctp_setsockopt_deactivate_k unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0;
if (!ep->auth_enable) return -EACCES; @@@ -3815,10 -3946,32 +3946,32 @@@ return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_deact_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; }
/* @@@ -3884,11 -4037,25 +4037,25 @@@ static int sctp_setsockopt_paddr_thresh sizeof(struct sctp_paddrthlds))) return -EFAULT;
- - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) return -ENOENT; + + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { if (val.spt_pathmaxrxt) @@@ -3900,14 -4067,11 +4067,11 @@@ asoc->pathmaxrxt = val.spt_pathmaxrxt; asoc->pf_retrans = val.spt_pathpfthld; } else { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); - if (!trans) - return -ENOENT; + struct sctp_sock *sp = sctp_sk(sk);
if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; - trans->pf_retrans = val.spt_pathpfthld; + sp->pathmaxrxt = val.spt_pathmaxrxt; + sp->pf_retrans = val.spt_pathpfthld; }
return 0; @@@ -3950,6 -4114,7 +4114,7 @@@ static int sctp_setsockopt_pr_supported unsigned int optlen) { struct sctp_assoc_value params; + struct sctp_association *asoc;
if (optlen != sizeof(params)) return -EINVAL; @@@ -3957,6 -4122,11 +4122,11 @@@ if (copy_from_user(¶ms, optval, optlen)) return -EFAULT;
+ asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
return 0; @@@ -3966,6 -4136,7 +4136,7 @@@ static int sctp_setsockopt_default_prin char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_default_prinfo info; struct sctp_association *asoc; int retval = -EINVAL; @@@ -3985,19 -4156,31 +4156,31 @@@ info.pr_value = 0;
asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); asoc->default_timetolive = info.pr_value; - } else if (!info.pr_assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); + goto out; + }
+ if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); sp->default_timetolive = info.pr_value; - } else { - goto out; }
- retval = 0; + if (info.pr_assoc_id == SCTP_CURRENT_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); + asoc->default_timetolive = info.pr_value; + } + }
out: return retval; @@@ -4020,15 -4203,14 +4203,14 @@@ static int sctp_setsockopt_reconfig_sup }
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - asoc->reconf_enable = !!params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->reconf_enable = !!params.assoc_value; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out; - } + + if (asoc) + asoc->reconf_enable = !!params.assoc_value; + else + sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value;
retval = 0;
@@@ -4040,6 -4222,7 +4222,7 @@@ static int sctp_setsockopt_enable_strre char __user *optval, unsigned int optlen) { + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; @@@ -4056,17 -4239,25 +4239,25 @@@ goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { asoc->strreset_enable = params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->strreset_enable = params.assoc_value; - } else { goto out; }
- retval = 0; + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + ep->strreset_enable = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &ep->asocs, asocs) + asoc->strreset_enable = params.assoc_value;
out: return retval; @@@ -4161,29 -4352,44 +4352,44 @@@ static int sctp_setsockopt_scheduler(st char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_assoc_value params; - int retval = -EINVAL; + int retval = 0;
if (optlen < sizeof(params)) - goto out; + return -EINVAL;
optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT;
if (params.assoc_value > SCTP_SS_MAX) - goto out; + return -EINVAL;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - goto out; + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- retval = sctp_sched_set_sched(asoc, params.assoc_value); + if (asoc) + return sctp_sched_set_sched(asoc, params.assoc_value); + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_ss = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_sched_set_sched(asoc, + params.assoc_value); + + if (ret && !retval) + retval = ret; + } + }
- out: return retval; }
@@@ -4191,8 -4397,8 +4397,8 @@@ static int sctp_setsockopt_scheduler_va char __user *optval, unsigned int optlen) { - struct sctp_association *asoc; struct sctp_stream_value params; + struct sctp_association *asoc; int retval = -EINVAL;
if (optlen < sizeof(params)) @@@ -4205,11 -4411,24 +4411,24 @@@ }
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) + if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC && + sctp_style(sk, UDP)) goto out;
- retval = sctp_sched_set_value(asoc, params.stream_id, - params.stream_value, GFP_KERNEL); + if (asoc) { + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + goto out; + } + + retval = 0; + + list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) { + int ret = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + if (ret && !retval) /* try to return the 1st error. */ + retval = ret; + }
out: return retval; @@@ -4220,8 -4439,8 +4439,8 @@@ static int sctp_setsockopt_interleaving unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct net *net = sock_net(sk); struct sctp_assoc_value params; + struct sctp_association *asoc; int retval = -EINVAL;
if (optlen < sizeof(params)) @@@ -4233,10 -4452,12 +4452,12 @@@ goto out; }
- if (params.assoc_id) + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out;
- if (!net->sctp.intl_enable || !sp->frag_interleave) { + if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) { retval = -EPERM; goto out; } @@@ -4271,54 -4492,69 +4492,69 @@@ static int sctp_setsockopt_reuse_port(s return 0; }
+ static int sctp_assoc_ulpevent_type_set(struct sctp_event *param, + struct sctp_association *asoc) + { + struct sctp_ulpevent *event; + + sctp_ulpevent_type_set(&asoc->subscribe, param->se_type, param->se_on); + + if (param->se_type == SCTP_SENDER_DRY_EVENT && param->se_on) { + if (sctp_outq_is_empty(&asoc->outqueue)) { + event = sctp_ulpevent_make_sender_dry_event(asoc, + GFP_USER | __GFP_NOWARN); + if (!event) + return -ENOMEM; + + asoc->stream.si->enqueue_event(&asoc->ulpq, event); + } + } + + return 0; + } + static int sctp_setsockopt_event(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_ulpevent *event; struct sctp_event param; int retval = 0;
- if (optlen < sizeof(param)) { - retval = -EINVAL; - goto out; - } + if (optlen < sizeof(param)) + return -EINVAL;
optlen = sizeof(param); - if (copy_from_user(¶m, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶m, optval, optlen)) + return -EFAULT;
if (param.se_type < SCTP_SN_TYPE_BASE || - param.se_type > SCTP_SN_TYPE_MAX) { - retval = -EINVAL; - goto out; - } + param.se_type > SCTP_SN_TYPE_MAX) + return -EINVAL;
asoc = sctp_id2assoc(sk, param.se_assoc_id); - if (!asoc) { - sctp_ulpevent_type_set(&sctp_sk(sk)->subscribe, - param.se_type, param.se_on); - goto out; - } + if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- sctp_ulpevent_type_set(&asoc->subscribe, param.se_type, param.se_on); + if (asoc) + return sctp_assoc_ulpevent_type_set(¶m, asoc);
- if (param.se_type == SCTP_SENDER_DRY_EVENT && param.se_on) { - if (sctp_outq_is_empty(&asoc->outqueue)) { - event = sctp_ulpevent_make_sender_dry_event(asoc, - GFP_USER | __GFP_NOWARN); - if (!event) { - retval = -ENOMEM; - goto out; - } + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) + sctp_ulpevent_type_set(&sp->subscribe, + param.se_type, param.se_on);
- asoc->stream.si->enqueue_event(&asoc->ulpq, event); + if (param.se_assoc_id == SCTP_CURRENT_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_assoc_ulpevent_type_set(¶m, asoc); + + if (ret && !retval) + retval = ret; } }
- out: return retval; }
@@@ -4777,12 -5013,14 +5013,14 @@@ static int sctp_init_sock(struct sock * */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; + sp->pf_retrans = net->sctp.pf_retrans; sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; + sp->default_ss = SCTP_SS_DEFAULT;
/* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. @@@ -5676,12 -5914,13 +5914,13 @@@ static int sctp_getsockopt_peer_addr_pa } }
- /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@@ -5810,19 -6049,19 +6049,19 @@@ static int sctp_getsockopt_delayed_ack( } else return -EINVAL;
- /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
if (asoc) { /* Fetch association values. */ if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { - params.sack_delay = jiffies_to_msecs( - asoc->sackdelay); + params.sack_delay = jiffies_to_msecs(asoc->sackdelay); params.sack_freq = asoc->sackfreq;
} else { @@@ -6175,8 -6414,10 +6414,10 @@@ static int sctp_getsockopt_default_send return -EFAULT;
asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; @@@ -6219,8 -6460,10 +6460,10 @@@ static int sctp_getsockopt_default_sndi return -EFAULT;
asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.snd_sid = asoc->default_stream; info.snd_flags = asoc->default_flags; @@@ -6296,7 -6539,8 +6539,8 @@@ static int sctp_getsockopt_rtoinfo(stru
asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
- if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Values corresponding to the specific association. */ @@@ -6353,7 -6597,8 +6597,8 @@@ static int sctp_getsockopt_associnfo(st
asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
- if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Values correspoinding to the specific association */ @@@ -6428,7 -6673,6 +6673,6 @@@ static int sctp_getsockopt_context(stru char __user *optval, int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc;
if (len < sizeof(struct sctp_assoc_value)) @@@ -6439,16 -6683,13 +6683,13 @@@ if (copy_from_user(¶ms, optval, len)) return -EFAULT;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->default_rcv_context; - } else { - params.assoc_value = sp->default_rcv_context; - } + params.assoc_value = asoc ? asoc->default_rcv_context + : sctp_sk(sk)->default_rcv_context;
if (put_user(len, optlen)) return -EFAULT; @@@ -6497,7 -6738,7 +6738,7 @@@ static int sctp_getsockopt_maxseg(struc "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@@ -6506,7 -6747,8 +6747,8 @@@ return -EINVAL;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
if (asoc) @@@ -6583,7 -6825,6 +6825,6 @@@ static int sctp_getsockopt_maxburst(str int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc;
if (len == sizeof(int)) { @@@ -6592,7 -6833,7 +6833,7 @@@ "Use of int in max_burst socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@@ -6600,15 -6841,12 +6841,12 @@@ } else return -EINVAL;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->max_burst; - } else - params.assoc_value = sp->max_burst; + params.assoc_value = asoc ? asoc->max_burst : sctp_sk(sk)->max_burst;
if (len == sizeof(int)) { if (copy_to_user(optval, ¶ms.assoc_value, len)) @@@ -6759,14 -6997,12 +6997,12 @@@ static int sctp_getsockopt_local_auth_c
to = p->gauth_chunks; asoc = sctp_id2assoc(sk, val.gauth_assoc_id); - if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.gauth_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- if (asoc) - ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; - else - ch = ep->auth_chunk_list; - + ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks + : ep->auth_chunk_list; if (!ch) goto num;
@@@ -6911,14 -7147,7 +7147,7 @@@ static int sctp_getsockopt_paddr_thresh if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) return -EFAULT;
- if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) - return -ENOENT; - - val.spt_pathpfthld = asoc->pf_retrans; - val.spt_pathmaxrxt = asoc->pathmaxrxt; - } else { + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { trans = sctp_addr_id2transport(sk, &val.spt_address, val.spt_assoc_id); if (!trans) @@@ -6926,6 -7155,23 +7155,23 @@@
val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { + val.spt_pathpfthld = asoc->pf_retrans; + val.spt_pathmaxrxt = asoc->pathmaxrxt; + } else { + struct sctp_sock *sp = sctp_sk(sk); + + val.spt_pathpfthld = sp->pf_retrans; + val.spt_pathmaxrxt = sp->pathmaxrxt; }
if (put_user(len, optlen) || copy_to_user(optval, &val, len)) @@@ -7056,17 -7302,15 +7302,15 @@@ static int sctp_getsockopt_pr_supported goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->prsctp_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->prsctp_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->prsctp_enable + : sctp_sk(sk)->ep->prsctp_enable; + if (put_user(len, optlen)) goto out;
@@@ -7097,17 -7341,20 +7341,20 @@@ static int sctp_getsockopt_default_prin goto out;
asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + if (asoc) { info.pr_policy = SCTP_PR_POLICY(asoc->default_flags); info.pr_value = asoc->default_timetolive; - } else if (!info.pr_assoc_id) { + } else { struct sctp_sock *sp = sctp_sk(sk);
info.pr_policy = SCTP_PR_POLICY(sp->default_flags); info.pr_value = sp->default_timetolive; - } else { - retval = -EINVAL; - goto out; }
if (put_user(len, optlen)) @@@ -7263,17 -7510,15 +7510,15 @@@ static int sctp_getsockopt_reconfig_sup goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->reconf_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->reconf_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->reconf_enable + : sctp_sk(sk)->ep->reconf_enable; + if (put_user(len, optlen)) goto out;
@@@ -7304,17 -7549,15 +7549,15 @@@ static int sctp_getsockopt_enable_strre goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->strreset_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->strreset_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->strreset_enable + : sctp_sk(sk)->ep->strreset_enable; + if (put_user(len, optlen)) goto out;
@@@ -7345,12 -7588,14 +7588,14 @@@ static int sctp_getsockopt_scheduler(st goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
- params.assoc_value = sctp_sched_get_sched(asoc); + params.assoc_value = asoc ? sctp_sched_get_sched(asoc) + : sctp_sk(sk)->default_ss;
if (put_user(len, optlen)) goto out; @@@ -7424,17 -7669,15 +7669,15 @@@ static int sctp_getsockopt_interleaving goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->intl_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->strm_interleave; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->intl_enable + : sctp_sk(sk)->strm_interleave; + if (put_user(len, optlen)) goto out;
@@@ -7486,6 -7729,10 +7729,10 @@@ static int sctp_getsockopt_event(struc return -EINVAL;
asoc = sctp_id2assoc(sk, param.se_assoc_id); + if (!asoc && param.se_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe; param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type);
diff --combined net/smc/af_smc.c index b04a813fc865,369870b0ef79..fb9c8711962a --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@@ -291,7 -291,8 +291,8 @@@ static void smc_copy_sock_settings(stru (1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_WIFI_STATUS) | \ (1UL << SOCK_NOFCS) | \ - (1UL << SOCK_FILTER_LOCKED)) + (1UL << SOCK_FILTER_LOCKED) | \ + (1UL << SOCK_TSTAMP_NEW)) /* copy only relevant settings and flags of SOL_SOCKET level from smc to * clc socket (since smc is not called for these options from net/core) */ @@@ -1505,11 -1506,6 +1506,11 @@@ static int smc_recvmsg(struct socket *s
smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@@ -1845,11 -1841,7 +1846,11 @@@ static ssize_t smc_splice_read(struct s
smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) diff --combined net/socket.c index d80d87a395ea,d51930689b98..643a1648fcc2 --- a/net/socket.c +++ b/net/socket.c @@@ -669,7 -669,7 +669,7 @@@ static bool skb_is_err_queue(const stru * before the software timestamp is received, a hardware TX timestamp may be * returned only if there is no software TX timestamp. Ignore false software * timestamps, which may be made in the __sock_recv_timestamp() call when the - * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a + * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a * hardware timestamp. */ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) @@@ -705,7 -705,9 +705,9 @@@ void __sock_recv_timestamp(struct msghd struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); - struct scm_timestamping tss; + int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); + struct scm_timestamping_internal tss; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@@ -719,34 -721,54 +721,54 @@@
if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; - skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, - sizeof(tv), &tv); + if (new_tstamp) { + struct __kernel_sock_timeval tv; + + skb_get_new_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(tv), &tv); + } else { + struct __kernel_old_timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } } else { - struct timespec ts; - skb_get_timestampns(skb, &ts); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts), &ts); + if (new_tstamp) { + struct __kernel_timespec ts; + + skb_get_new_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + sizeof(ts), &ts); + } else { + struct timespec ts; + + skb_get_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, + sizeof(ts), &ts); + } } }
memset(&tss, 0, sizeof(tss)); if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb); } if (!empty) { - put_cmsg(msg, SOL_SOCKET, - SCM_TIMESTAMPING, sizeof(tss), &tss); + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, &tss); + else + put_cmsg_scm_timestamping(msg, &tss);
if (skb_is_err_queue(skb) && skb->len && SKB_EXT_ERR(skb)->opt_stats) @@@ -941,7 -963,8 +963,7 @@@ void dlci_ioctl_set(int (*hook) (unsign EXPORT_SYMBOL(dlci_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@@ -967,11 -990,11 +989,11 @@@ } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@@ -1070,7 -1093,8 +1092,7 @@@ static long sock_ioctl(struct file *fil err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@@ -2778,7 -2802,8 +2800,7 @@@ static int do_siocgstamp(struct net *ne int err;
set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@@ -2794,7 -2819,8 +2816,7 @@@ static int do_siocgstampns(struct net * int err;
set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@@ -2990,54 -3016,6 +3012,54 @@@ static int compat_ifr_data_ioctl(struc return dev_ioctl(net, cmd, &ifreq, NULL); }
+static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCGIFNAME: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@@ -3153,7 -3131,8 +3175,7 @@@ static int routing_ioctl(struct net *ne }
set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs);
out: @@@ -3253,22 -3232,21 +3275,22 @@@ static int compat_sock_ioctl_trans(stru case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSARP: - case SIOCGARP: - case SIOCDARP: - case SIOCATMARK: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return compat_ifreq_ioctl(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: + return sock_do_ioctl(net, sock, cmd, arg); }
return -ENOIOCTLCMD; diff --combined tools/bpf/bpftool/map.c index 1ef1ee2280a2,2160a8ef17e5..e0c650d91784 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@@ -21,7 -21,7 +21,7 @@@ #include "json_writer.h" #include "main.h"
- static const char * const map_type_name[] = { + const char * const map_type_name[] = { [BPF_MAP_TYPE_UNSPEC] = "unspec", [BPF_MAP_TYPE_HASH] = "hash", [BPF_MAP_TYPE_ARRAY] = "array", @@@ -48,6 -48,8 +48,8 @@@ [BPF_MAP_TYPE_STACK] = "stack", };
+ const size_t map_type_name_size = ARRAY_SIZE(map_type_name); + static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@@ -285,16 -287,21 +287,21 @@@ static void print_entry_plain(struct bp single_line = info->key_size + info->value_size <= 24 && !break_names;
- printf("key:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, key, info->key_size, " "); + if (info->key_size) { + printf("key:%c", break_names ? '\n' : ' '); + fprint_hex(stdout, key, info->key_size, " ");
- printf(single_line ? " " : "\n"); + printf(single_line ? " " : "\n"); + }
- printf("value:%c", break_names ? '\n' : ' '); - if (value) - fprint_hex(stdout, value, info->value_size, " "); - else - printf("<no entry>"); + if (info->value_size) { + printf("value:%c", break_names ? '\n' : ' '); + if (value) + fprint_hex(stdout, value, info->value_size, + " "); + else + printf("<no entry>"); + }
printf("\n"); } else { @@@ -303,19 -310,23 +310,23 @@@ n = get_possible_cpus(); step = round_up(info->value_size, 8);
- printf("key:\n"); - fprint_hex(stdout, key, info->key_size, " "); - printf("\n"); - for (i = 0; i < n; i++) { - printf("value (CPU %02d):%c", - i, info->value_size > 16 ? '\n' : ' '); - if (value) - fprint_hex(stdout, value + i * step, - info->value_size, " "); - else - printf("<no entry>"); + if (info->key_size) { + printf("key:\n"); + fprint_hex(stdout, key, info->key_size, " "); printf("\n"); } + if (info->value_size) { + for (i = 0; i < n; i++) { + printf("value (CPU %02d):%c", + i, info->value_size > 16 ? '\n' : ' '); + if (value) + fprint_hex(stdout, value + i * step, + info->value_size, " "); + else + printf("<no entry>"); + printf("\n"); + } + } } }
@@@ -347,20 -358,6 +358,20 @@@ static char **parse_bytes(char **argv, return argv + i; }
+/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); +} + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@@ -429,6 -426,9 +440,9 @@@ p_err("not enough value arguments for map of progs"); return -1; } + if (is_prefix(*argv, "id")) + p_info("Warning: updating program array via MAP_ID, make sure this map is kept open\n" + " by some process or pinned otherwise update will be lost");
fd = prog_parse_fd(&argc, &argv); if (fd < 0) @@@ -440,8 -440,6 +454,8 @@@ argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); }
return parse_elem(argv, info, key, NULL, key_size, value_size, @@@ -513,9 -511,10 +527,9 @@@ static int show_map_close_json(int fd, jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); } - if (atoi(owner_jited)) - jsonw_bool_field(json_wtr, "owner_jited", true); - else - jsonw_bool_field(json_wtr, "owner_jited", false); + if (owner_jited) + jsonw_bool_field(json_wtr, "owner_jited", + !!atoi(owner_jited));
free(owner_prog_type); free(owner_jited); @@@ -568,8 -567,7 +582,8 @@@ static int show_map_close_plain(int fd char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); char *owner_jited = get_fdinfo(fd, "owner_jited");
- printf("\n\t"); + if (owner_prog_type || owner_jited) + printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type);
@@@ -579,9 -577,10 +593,9 @@@ else printf("owner_prog_type %d ", prog_type); } - if (atoi(owner_jited)) - printf("owner jited"); - else - printf("owner not jited"); + if (owner_jited) + printf("owner%s jited", + atoi(owner_jited) ? "" : " not");
free(owner_prog_type); free(owner_jited); @@@ -794,6 -793,32 +808,32 @@@ exit_free return err; }
+ static int alloc_key_value(struct bpf_map_info *info, void **key, void **value) + { + *key = NULL; + *value = NULL; + + if (info->key_size) { + *key = malloc(info->key_size); + if (!*key) { + p_err("key mem alloc failed"); + return -1; + } + } + + if (info->value_size) { + *value = alloc_value(info); + if (!*value) { + p_err("value mem alloc failed"); + free(*key); + *key = NULL; + return -1; + } + } + + return 0; + } + static int do_update(int argc, char **argv) { struct bpf_map_info info = {}; @@@ -810,13 -835,9 +850,9 @@@ if (fd < 0) return -1;
- key = malloc(info.key_size); - value = alloc_value(&info); - if (!key || !value) { - p_err("mem alloc failed"); - err = -1; + err = alloc_key_value(&info, &key, &value); + if (err) goto exit_free; - }
err = parse_elem(argv, &info, key, value, info.key_size, info.value_size, &flags, &value_fd); @@@ -841,12 -862,51 +877,51 @@@ exit_free return err; }
+ static void print_key_value(struct bpf_map_info *info, void *key, + void *value) + { + json_writer_t *btf_wtr; + struct btf *btf = NULL; + int err; + + err = btf__get_from_id(info->btf_id, &btf); + if (err) { + p_err("failed to get btf"); + return; + } + + if (json_output) { + print_entry_json(info, key, value, btf); + } else if (btf) { + /* if here json_wtr wouldn't have been initialised, + * so let's create separate writer for btf + */ + btf_wtr = get_btf_writer(); + if (!btf_wtr) { + p_info("failed to create json writer for btf. falling back to plain output"); + btf__free(btf); + btf = NULL; + print_entry_plain(info, key, value); + } else { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + do_dump_btf(&d, info, key, value); + jsonw_destroy(&btf_wtr); + } + } else { + print_entry_plain(info, key, value); + } + btf__free(btf); + } + static int do_lookup(int argc, char **argv) { struct bpf_map_info info = {}; __u32 len = sizeof(info); - json_writer_t *btf_wtr; - struct btf *btf = NULL; void *key, *value; int err; int fd; @@@ -858,13 -918,9 +933,9 @@@ if (fd < 0) return -1;
- key = malloc(info.key_size); - value = alloc_value(&info); - if (!key || !value) { - p_err("mem alloc failed"); - err = -1; + err = alloc_key_value(&info, &key, &value); + if (err) goto exit_free; - }
err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); if (err) @@@ -888,43 -944,12 +959,12 @@@ }
/* here means bpf_map_lookup_elem() succeeded */ - err = btf__get_from_id(info.btf_id, &btf); - if (err) { - p_err("failed to get btf"); - goto exit_free; - } - - if (json_output) { - print_entry_json(&info, key, value, btf); - } else if (btf) { - /* if here json_wtr wouldn't have been initialised, - * so let's create separate writer for btf - */ - btf_wtr = get_btf_writer(); - if (!btf_wtr) { - p_info("failed to create json writer for btf. falling back to plain output"); - btf__free(btf); - btf = NULL; - print_entry_plain(&info, key, value); - } else { - struct btf_dumper d = { - .btf = btf, - .jw = btf_wtr, - .is_plain_text = true, - }; - - do_dump_btf(&d, &info, key, value); - jsonw_destroy(&btf_wtr); - } - } else { - print_entry_plain(&info, key, value); - } + print_key_value(&info, key, value);
exit_free: free(key); free(value); close(fd); - btf__free(btf);
return err; } @@@ -1137,6 -1162,49 +1177,49 @@@ static int do_create(int argc, char **a return 0; }
+ static int do_pop_dequeue(int argc, char **argv) + { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *value; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + err = alloc_key_value(&info, &key, &value); + if (err) + goto exit_free; + + err = bpf_map_lookup_and_delete_elem(fd, key, value); + if (err) { + if (errno == ENOENT) { + if (json_output) + jsonw_null(json_wtr); + else + printf("Error: empty map\n"); + } else { + p_err("pop failed: %s", strerror(errno)); + } + + goto exit_free; + } + + print_key_value(&info, key, value); + + exit_free: + free(key); + free(value); + close(fd); + + return err; + } + static int do_help(int argc, char **argv) { if (json_output) { @@@ -1150,12 -1218,17 +1233,17 @@@ " entries MAX_ENTRIES name NAME [flags FLAGS] \\n" " [dev NAME]\n" " %s %s dump MAP\n" - " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" - " %s %s lookup MAP key DATA\n" + " %s %s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" + " %s %s lookup MAP [key DATA]\n" " %s %s getnext MAP [key DATA]\n" " %s %s delete MAP key DATA\n" " %s %s pin MAP FILE\n" " %s %s event_pipe MAP [cpu N index M]\n" + " %s %s peek MAP\n" + " %s %s push MAP value VALUE\n" + " %s %s pop MAP\n" + " %s %s enqueue MAP value VALUE\n" + " %s %s dequeue MAP\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@@ -1173,7 -1246,8 +1261,8 @@@ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
return 0; } @@@ -1190,6 -1264,11 +1279,11 @@@ static const struct cmd cmds[] = { "pin", do_pin }, { "event_pipe", do_event_pipe }, { "create", do_create }, + { "peek", do_lookup }, + { "push", do_update }, + { "enqueue", do_update }, + { "pop", do_pop_dequeue }, + { "dequeue", do_pop_dequeue }, { 0 } };
diff --combined tools/bpf/bpftool/prog.c index b54ed82b9589,0640e9bc0ada..33ed0806ccc0 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@@ -78,14 -78,13 +78,14 @@@ static void print_boot_time(__u64 nsecs
static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd;
while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno)); @@@ -931,10 -930,9 +931,9 @@@ static int load_with_options(int argc, err = libbpf_prog_type_by_name(type, &attr.prog_type, &expected_attach_type); free(type); - if (err < 0) { - p_err("unknown program type '%s'", *argv); + if (err < 0) goto err_free_reuse_maps; - } + NEXT_ARG(); } else if (is_prefix(*argv, "map")) { void *new_map_replace; @@@ -1029,11 -1027,8 +1028,8 @@@
err = libbpf_prog_type_by_name(sec_name, &prog_type, &expected_attach_type); - if (err < 0) { - p_err("failed to guess program type based on section name %s\n", - sec_name); + if (err < 0) goto err_close_obj; - } }
bpf_program__set_ifindex(pos, ifindex); diff --combined tools/testing/selftests/bpf/test_btf.c index 91420fa83b08,179f1d8ec5bf..a51ea3f32cb8 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@@ -18,6 -18,7 +18,7 @@@ #include <unistd.h> #include <fcntl.h> #include <errno.h> + #include <assert.h> #include <bpf/libbpf.h> #include <bpf/btf.h>
@@@ -134,6 -135,12 +135,12 @@@ static struct btf_header hdr_tmpl = .hdr_len = sizeof(struct btf_header), };
+ /* several different mapv kinds(types) supported by pprint */ + enum pprint_mapv_kind_t { + PPRINT_MAPV_KIND_BASIC = 0, + PPRINT_MAPV_KIND_INT128, + }; + struct btf_raw_test { const char *descr; const char *str_sec; @@@ -156,6 -163,7 +163,7 @@@ int type_off_delta; int str_off_delta; int str_len_delta; + enum pprint_mapv_kind_t mapv_kind; };
#define BTF_STR_SEC(str) \ @@@ -1881,12 -1889,13 +1889,12 @@@ static struct btf_raw_test raw_tests[] },
{ - .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - BTF_CONST_ENC(4), /* [3] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ BTF_FUNC_PROTO_ARG_ENC(0, 1), BTF_FUNC_PROTO_ARG_ENC(0, 2), BTF_END_RAW, @@@ -1900,6 -1909,8 +1908,6 @@@ .key_type_id = 1, .value_type_id = 1, .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid type_id", },
{ @@@ -2704,6 -2715,99 +2712,99 @@@ .err_str = "Invalid member offset", },
+ { + .descr = "128-bit int", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + }, + + { + .descr = "struct, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + }, + + { + .descr = "struct, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 120, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + }, + + { + .descr = "struct, kind_flag, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + }, + + { + .descr = "struct, kind_flag, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(120, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + }, + }; /* struct btf_raw_test raw_tests[] */
static const char *get_next_str(const char *start, const char *end) @@@ -3527,6 -3631,16 +3628,16 @@@ struct pprint_mapv uint32_t bits2c:2; };
+ #ifdef __SIZEOF_INT128__ + struct pprint_mapv_int128 { + __int128 si128a; + __int128 si128b; + unsigned __int128 bits3:3; + unsigned __int128 bits80:80; + unsigned __int128 ui128; + }; + #endif + static struct btf_raw_test pprint_test_template[] = { { .raw_types = { @@@ -3718,6 -3832,35 +3829,35 @@@ .max_entries = 128 * 1024, },
+ #ifdef __SIZEOF_INT128__ + { + /* test int128 */ + .raw_types = { + /* unsigned int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* __int128 */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 128, 16), + /* unsigned __int128 */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 128, 16), + /* struct pprint_mapv_int128 */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 5), 64), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), /* si128a */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 128)), /* si128b */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(3, 256)), /* bits3 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(80, 259)), /* bits80 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(0, 384)), /* ui128 */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned int\0__int128\0unsigned __int128\0pprint_mapv_int128\0si128a\0si128b\0bits3\0bits80\0ui128"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv_int128), + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 128 * 1024, + .mapv_kind = PPRINT_MAPV_KIND_INT128, + }, + #endif + };
static struct btf_pprint_test_meta { @@@ -3784,24 -3927,108 +3924,108 @@@
};
+ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) + { + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) + return sizeof(struct pprint_mapv); + + #ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) + return sizeof(struct pprint_mapv_int128); + #endif
- static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i, + assert(0); + } + + static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, + void *mapv, uint32_t i, int num_cpus, int rounded_value_size) { int cpu;
- for (cpu = 0; cpu < num_cpus; cpu++) { - v->ui32 = i + cpu; - v->si32 = -i; - v->unused_bits2a = 3; - v->bits28 = i; - v->unused_bits2b = 3; - v->ui64 = i; - v->aenum = i & 0x03; - v->ui32b = 4; - v->bits2c = 1; - v = (void *)v + rounded_value_size; + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->ui32 = i + cpu; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; + v->ui32b = 4; + v->bits2c = 1; + v = (void *)v + rounded_value_size; + } + } + + #ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->si128a = i; + v->si128b = -i; + v->bits3 = i & 0x07; + v->bits80 = (((unsigned __int128)1) << 64) + i; + v->ui128 = (((unsigned __int128)2) << 64) + i; + v = (void *)v + rounded_value_size; + } } + #endif + } + + ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, + char *expected_line, ssize_t line_size, + bool percpu_map, unsigned int next_key, + int cpu, void *mapv) + { + ssize_t nexpected_line = -1; + + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," + "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "%u,0x%x}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + v->ui32, v->si32, + v->unused_bits2a, + v->bits28, + v->unused_bits2b, + v->ui64, + v->ui8a[0], v->ui8a[1], + v->ui8a[2], v->ui8a[3], + v->ui8a[4], v->ui8a[5], + v->ui8a[6], v->ui8a[7], + pprint_enum_str[v->aenum], + v->ui32b, + v->bits2c); + } + + #ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {0x%lx,0x%lx,0x%lx," + "0x%lx%016lx,0x%lx%016lx}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + (uint64_t)v->si128a, + (uint64_t)v->si128b, + (uint64_t)v->bits3, + (uint64_t)(v->bits80 >> 64), + (uint64_t)v->bits80, + (uint64_t)(v->ui128 >> 64), + (uint64_t)v->ui128); + } + #endif + + return nexpected_line; }
static int check_line(const char *expected_line, int nexpected_line, @@@ -3825,10 -4052,10 +4049,10 @@@ static int do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; + enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; struct bpf_create_map_attr create_attr = {}; bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; - struct pprint_mapv *mapv = NULL; unsigned int key, nr_read_elems; int map_fd = -1, btf_fd = -1; unsigned int raw_btf_size; @@@ -3837,6 -4064,7 +4061,7 @@@ char pin_path[255]; size_t line_len = 0; char *line = NULL; + void *mapv = NULL; uint8_t *raw_btf; ssize_t nread;
@@@ -3889,7 -4117,7 +4114,7 @@@
percpu_map = test->percpu_map; num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; - rounded_value_size = round_up(sizeof(struct pprint_mapv), 8); + rounded_value_size = round_up(get_pprint_mapv_size(mapv_kind), 8); mapv = calloc(num_cpus, rounded_value_size); if (CHECK(!mapv, "mapv allocation failure")) { err = -1; @@@ -3897,7 -4125,7 +4122,7 @@@ }
for (key = 0; key < test->max_entries; key++) { - set_pprint_mapv(mapv, key, num_cpus, rounded_value_size); + set_pprint_mapv(mapv_kind, mapv, key, num_cpus, rounded_value_size); bpf_map_update_elem(map_fd, &key, mapv, 0); }
@@@ -3921,13 -4149,13 +4146,13 @@@ ordered_map = test->ordered_map; lossless_map = test->lossless_map; do { - struct pprint_mapv *cmapv; ssize_t nexpected_line; unsigned int next_key; + void *cmapv; int cpu;
next_key = ordered_map ? nr_read_elems : atoi(line); - set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size); + set_pprint_mapv(mapv_kind, mapv, next_key, num_cpus, rounded_value_size); cmapv = mapv;
for (cpu = 0; cpu < num_cpus; cpu++) { @@@ -3960,31 -4188,16 +4185,16 @@@ break; }
- nexpected_line = snprintf(expected_line, sizeof(expected_line), - "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," - "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," - "%u,0x%x}\n", - percpu_map ? "\tcpu" : "", - percpu_map ? cpu : next_key, - cmapv->ui32, cmapv->si32, - cmapv->unused_bits2a, - cmapv->bits28, - cmapv->unused_bits2b, - cmapv->ui64, - cmapv->ui8a[0], cmapv->ui8a[1], - cmapv->ui8a[2], cmapv->ui8a[3], - cmapv->ui8a[4], cmapv->ui8a[5], - cmapv->ui8a[6], cmapv->ui8a[7], - pprint_enum_str[cmapv->aenum], - cmapv->ui32b, - cmapv->bits2c); - + nexpected_line = get_pprint_expected_line(mapv_kind, expected_line, + sizeof(expected_line), + percpu_map, next_key, + cpu, cmapv); err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); if (err == -1) goto done;
- cmapv = (void *)cmapv + rounded_value_size; + cmapv = cmapv + rounded_value_size; }
if (percpu_map) { @@@ -4080,6 -4293,10 +4290,10 @@@ static struct prog_info_raw_test __u32 line_info_rec_size; __u32 nr_jited_ksyms; bool expected_prog_load_failure; + __u32 dead_code_cnt; + __u32 dead_code_mask; + __u32 dead_func_cnt; + __u32 dead_func_mask; } info_raw_tests[] = { { .descr = "func_type (main func + one sub)", @@@ -4506,6 -4723,369 +4720,369 @@@ .expected_prog_load_failure = true, },
+ { + .descr = "line_info (dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0/* dead jmp */\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 1, + .dead_code_mask = 0x01, + }, + + { + .descr = "line_info (dead end)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0/* dead jmp */\0return a + b;\0/* dead exit */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 12), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 11), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 9), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 8), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 6, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x28, + }, + + { + .descr = "line_info (dead code + subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0/* dead jmp */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 8), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {14, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(14, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(16, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 9, + .dead_code_mask = 0x3fe, + }, + + { + .descr = "line_info (dead subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, + }, + + { + .descr = "line_info (dead last subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0int a=1+1;\0/* live call */" + "\0return 0;\0/* dead */\0/* dead */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x18, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, + }, + + { + .descr = "line_info (dead subprog + dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* dead */" + "\0return 0;\0return 0;\0return 0;" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return b + 1;\0return b + 1;\0return b + 1;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {7, 3}, {10, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(13, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 5, + .dead_code_mask = 0x1e2, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, + }, + + { + .descr = "line_info (dead subprog + dead start w/ move)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, + }, + + { + .descr = "line_info (dead end + subprog start w/ no linfo)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 3), + BPF_CALL_REL(3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 3}, {6, 4}, }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + }, + };
static size_t probe_prog_length(const struct bpf_insn *fp) @@@ -4565,6 -5145,7 +5142,7 @@@ static int test_get_finfo(const struct struct bpf_func_info *finfo; __u32 info_len, rec_size, i; void *func_info = NULL; + __u32 nr_func_info; int err;
/* get necessary lens */ @@@ -4574,7 -5155,8 +5152,8 @@@ fprintf(stderr, "%s\n", btf_log_buf); return -1; } - if (CHECK(info.nr_func_info != test->func_info_cnt, + nr_func_info = test->func_info_cnt - test->dead_func_cnt; + if (CHECK(info.nr_func_info != nr_func_info, "incorrect info.nr_func_info (1st) %d", info.nr_func_info)) { return -1; @@@ -4595,7 -5177,7 +5174,7 @@@
/* reset info to only retrieve func_info related data */ memset(&info, 0, sizeof(info)); - info.nr_func_info = test->func_info_cnt; + info.nr_func_info = nr_func_info; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); @@@ -4604,7 -5186,7 +5183,7 @@@ err = -1; goto done; } - if (CHECK(info.nr_func_info != test->func_info_cnt, + if (CHECK(info.nr_func_info != nr_func_info, "incorrect info.nr_func_info (2nd) %d", info.nr_func_info)) { err = -1; @@@ -4618,7 -5200,9 +5197,9 @@@ }
finfo = func_info; - for (i = 0; i < test->func_info_cnt; i++) { + for (i = 0; i < nr_func_info; i++) { + if (test->dead_func_mask & (1 << i)) + continue; if (CHECK(finfo->type_id != test->func_info[i][1], "incorrect func_type %u expected %u", finfo->type_id, test->func_info[i][1])) { @@@ -4647,6 -5231,7 +5228,7 @@@ static int test_get_linfo(const struct struct bpf_prog_info info = {}; __u32 *jited_func_lens = NULL; __u64 cur_func_ksyms; + __u32 dead_insns; int err;
jited_cnt = cnt; @@@ -4655,7 -5240,7 +5237,7 @@@ if (test->nr_jited_ksyms) nr_jited_ksyms = test->nr_jited_ksyms; else - nr_jited_ksyms = test->func_info_cnt; + nr_jited_ksyms = test->func_info_cnt - test->dead_func_cnt; nr_jited_func_lens = nr_jited_ksyms;
info_len = sizeof(struct bpf_prog_info); @@@ -4757,12 -5342,20 +5339,20 @@@ goto done; }
+ dead_insns = 0; + while (test->dead_code_mask & (1 << dead_insns)) + dead_insns++; + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", linfo[0].insn_off); for (i = 1; i < cnt; i++) { const struct bpf_line_info *expected_linfo;
- expected_linfo = patched_linfo + (i * test->line_info_rec_size); + while (test->dead_code_mask & (1 << (i + dead_insns))) + dead_insns++; + + expected_linfo = patched_linfo + + ((i + dead_insns) * test->line_info_rec_size); if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", i, linfo[i].insn_off, @@@ -4920,7 -5513,9 +5510,9 @@@ static int do_test_info_raw(unsigned in if (err) goto done;
- err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + err = test_get_linfo(test, patched_linfo, + attr.line_info_cnt - test->dead_code_cnt, + prog_fd); if (err) goto done;