The following commit has been merged in the master branch: commit 4e8f2fc1a55d543717efb70e170b09e773d0542b Merge: 158f323b9868b59967ad96957c4ca388161be321 1b1bc42c1692e9b62756323c675a44cb1a1f9dbd Author: David S. Miller davem@davemloft.net Date: Sat Jan 28 10:33:06 2017 -0500
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Two trivial overlapping changes conflicts in MPLS and mlx5.
Signed-off-by: David S. Miller davem@davemloft.net
diff --combined MAINTAINERS index d76fccd,5f10c28..cc106f7 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -976,6 -976,7 +976,7 @@@ M: Russell King <linux@armlinux.org.uk L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.armlinux.org.uk/ S: Maintained + T: git git://git.armlinux.org.uk/~rmk/linux-arm.git F: arch/arm/
ARM SUB-ARCHITECTURES @@@ -1153,6 -1154,7 +1154,7 @@@ ARM/CLKDEV SUPPOR M: Russell King linux@armlinux.org.uk L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained + T: git git://git.armlinux.org.uk/~rmk/linux-arm.git clkdev F: arch/arm/include/asm/clkdev.h F: drivers/clk/clkdev.c
@@@ -1688,6 -1690,7 +1690,7 @@@ M: Krzysztof Kozlowski <krzk@kernel.org R: Javier Martinez Canillas javier@osg.samsung.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) + Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/ S: Maintained F: arch/arm/boot/dts/s3c* F: arch/arm/boot/dts/s5p* @@@ -2596,12 -2599,6 +2599,12 @@@ L: netdev@vger.kernel.or S: Supported F: drivers/net/ethernet/broadcom/bnx2x/
+BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER +M: Michael Chan michael.chan@broadcom.com +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/broadcom/bnxt/ + BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE M: Florian Fainelli f.fainelli@gmail.com M: Ray Jui rjui@broadcom.com @@@ -3570,7 -3567,7 +3573,7 @@@ F: drivers/infiniband/hw/cxgb3 F: include/uapi/rdma/cxgb3-abi.h
CXGB4 ETHERNET DRIVER (CXGB4) - M: Hariprasad S hariprasad@chelsio.com + M: Ganesh Goudar ganeshgr@chelsio.com L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@@ -4103,12 -4100,18 +4106,18 @@@ F: drivers/gpu/drm/bridge
DRM DRIVER FOR BOCHS VIRTUAL GPU M: Gerd Hoffmann kraxel@redhat.com - S: Odd Fixes + L: virtualization@lists.linux-foundation.org + T: git git://git.kraxel.org/linux drm-qemu + S: Maintained F: drivers/gpu/drm/bochs/
DRM DRIVER FOR QEMU'S CIRRUS DEVICE M: Dave Airlie airlied@redhat.com - S: Odd Fixes + M: Gerd Hoffmann kraxel@redhat.com + L: virtualization@lists.linux-foundation.org + T: git git://git.kraxel.org/linux drm-qemu + S: Obsolete + W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/ F: drivers/gpu/drm/cirrus/
RADEON and AMDGPU DRM DRIVERS @@@ -4150,7 -4153,7 +4159,7 @@@ F: Documentation/gpu/i915.rs INTEL GVT-g DRIVERS (Intel GPU Virtualization) M: Zhenyu Wang zhenyuw@linux.intel.com M: Zhi Wang zhi.a.wang@intel.com - L: igvt-g-dev@lists.01.org + L: intel-gvt-dev@lists.freedesktop.org L: intel-gfx@lists.freedesktop.org W: https://01.org/igvt-g T: git https://github.com/01org/gvt-linux.git @@@ -4301,7 -4304,10 +4310,10 @@@ F: Documentation/devicetree/bindings/di
DRM DRIVER FOR QXL VIRTUAL GPU M: Dave Airlie airlied@redhat.com - S: Odd Fixes + M: Gerd Hoffmann kraxel@redhat.com + L: virtualization@lists.linux-foundation.org + T: git git://git.kraxel.org/linux drm-qemu + S: Maintained F: drivers/gpu/drm/qxl/ F: include/uapi/drm/qxl_drm.h
@@@ -7703,8 -7709,10 +7715,10 @@@ F: drivers/net/dsa/mv88e6xxx F: Documentation/devicetree/bindings/net/dsa/marvell.txt
MARVELL ARMADA DRM SUPPORT - M: Russell King rmk+kernel@armlinux.org.uk + M: Russell King linux@armlinux.org.uk S: Maintained + T: git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-armada-devel + T: git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-armada-fixes F: drivers/gpu/drm/armada/ F: include/uapi/drm/armada_drm.h F: Documentation/devicetree/bindings/display/armada/ @@@ -8909,8 -8917,10 +8923,10 @@@ S: Supporte F: drivers/nfc/nxp-nci
NXP TDA998X DRM DRIVER - M: Russell King rmk+kernel@armlinux.org.uk + M: Russell King linux@armlinux.org.uk S: Supported + T: git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-devel + T: git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-fixes F: drivers/gpu/drm/i2c/tda998x_drv.c F: include/drm/i2c/tda998x.h
@@@ -9957,13 -9967,6 +9973,13 @@@ L: linuxppc-dev@lists.ozlabs.or S: Maintained F: drivers/block/ps3vram.c
+PSAMPLE PACKET SAMPLING SUPPORT: +M: Yotam Gigi yotamg@mellanox.com +S: Maintained +F: net/psample +F: include/net/psample.h +F: include/uapi/linux/psample.h + PSTORE FILESYSTEM M: Anton Vorontsov anton@enomsg.org M: Colin Cross ccross@android.com @@@ -10857,13 -10860,6 +10873,13 @@@ S: Maintaine F: drivers/staging/media/st-cec/ F: Documentation/devicetree/bindings/media/stih-cec.txt
+SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS +M: Ursula Braun ubraun@linux.vnet.ibm.com +L: linux-s390@vger.kernel.org +W: http://www.ibm.com/developerworks/linux/linux390/ +S: Supported +F: net/smc/ + SYNOPSYS DESIGNWARE DMAC DRIVER M: Viresh Kumar vireshk@kernel.org M: Andy Shevchenko andriy.shevchenko@linux.intel.com @@@ -10872,6 -10868,13 +10888,6 @@@ F: include/linux/dma/dw. F: include/linux/platform_data/dma-dw.h F: drivers/dma/dw/
-SYNOPSYS DESIGNWARE ETHERNET QOS 4.10a driver -M: Lars Persson lars.persson@axis.com -L: netdev@vger.kernel.org -S: Supported -F: Documentation/devicetree/bindings/net/snps,dwc-qos-ethernet.txt -F: drivers/net/ethernet/synopsys/dwc_eth_qos.c - SYNOPSYS DESIGNWARE I2C DRIVER M: Jarkko Nikula jarkko.nikula@linux.intel.com R: Andy Shevchenko andriy.shevchenko@linux.intel.com @@@ -11314,13 -11317,6 +11330,13 @@@ F: arch/arm/mach-s3c24xx/mach-bast. F: arch/arm/mach-s3c24xx/bast-ide.c F: arch/arm/mach-s3c24xx/bast-irq.c
+SIPHASH PRF ROUTINES +M: Jason A. Donenfeld Jason@zx2c4.com +S: Maintained +F: lib/siphash.c +F: lib/test_siphash.c +F: include/linux/siphash.h + TI DAVINCI MACHINE SUPPORT M: Sekhar Nori nsekhar@ti.com M: Kevin Hilman khilman@kernel.org @@@ -13105,6 -13101,7 +13121,7 @@@ M: David Airlie <airlied@linux.ie M: Gerd Hoffmann kraxel@redhat.com L: dri-devel@lists.freedesktop.org L: virtualization@lists.linux-foundation.org + T: git git://git.kraxel.org/linux drm-qemu S: Maintained F: drivers/gpu/drm/virtio/ F: include/uapi/linux/virtio_gpu.h @@@ -13456,6 -13453,7 +13473,7 @@@ F: arch/x86
X86 PLATFORM DRIVERS M: Darren Hart dvhart@infradead.org + M: Andy Shevchenko andy@infradead.org L: platform-driver-x86@vger.kernel.org T: git git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git S: Maintained @@@ -13627,6 -13625,7 +13645,7 @@@ F: drivers/net/hamradio/z8530.
ZBUD COMPRESSED PAGE ALLOCATOR M: Seth Jennings sjenning@redhat.com + M: Dan Streetman ddstreet@ieee.org L: linux-mm@kvack.org S: Maintained F: mm/zbud.c @@@ -13682,6 -13681,7 +13701,7 @@@ F: Documentation/vm/zsmalloc.tx
ZSWAP COMPRESSED SWAP CACHING M: Seth Jennings sjenning@redhat.com + M: Dan Streetman ddstreet@ieee.org L: linux-mm@kvack.org S: Maintained F: mm/zswap.c diff --combined arch/arm/boot/dts/dra7.dtsi index 3a8579c,5ba1616..3e1f750 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@@ -1378,6 -1378,7 +1378,7 @@@ phy-names = "sata-phy"; clocks = <&sata_ref_clk>; ti,hwmods = "sata"; + ports-implemented = <0x1>; };
rtc: rtc@48838000 { @@@ -1708,6 -1709,7 +1709,6 @@@ cpdma_channels = <8>; ale_entries = <1024>; bd_ram_size = <0x2000>; - no_bd_ram = <0>; mac_control = <0x20>; slaves = <2>; active_slave = <0>; diff --combined arch/arm/configs/multi_v7_defconfig index 64f4419,028d2b7..b416abc --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@@ -253,8 -253,7 +253,8 @@@ CONFIG_R8169= CONFIG_SH_ETH=y CONFIG_SMSC911X=y CONFIG_STMMAC_ETH=y -CONFIG_SYNOPSYS_DWC_ETH_QOS=y +CONFIG_STMMAC_PLATFORM=y +CONFIG_DWMAC_DWC_QOS_ETH=y CONFIG_TI_CPSW=y CONFIG_XILINX_EMACLITE=y CONFIG_AT803X_PHY=y @@@ -472,7 -471,7 +472,7 @@@ CONFIG_MESON_WATCHDOG= CONFIG_DW_WATCHDOG=y CONFIG_DIGICOLOR_WATCHDOG=y CONFIG_BCM2835_WDT=y - CONFIG_BCM47XX_WATCHDOG=y + CONFIG_BCM47XX_WDT=y CONFIG_BCM7038_WDT=m CONFIG_BCM_KONA_WDT=y CONFIG_MFD_ACT8945A=y @@@ -894,7 -893,7 +894,7 @@@ CONFIG_BCM2835_MBOX= CONFIG_RASPBERRYPI_FIRMWARE=y CONFIG_EFI_VARS=m CONFIG_EFI_CAPSULE_LOADER=m - CONFIG_CONFIG_BCM47XX_NVRAM=y + CONFIG_BCM47XX_NVRAM=y CONFIG_BCM47XX_SPROM=y CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y diff --combined drivers/isdn/hardware/eicon/message.c index 5dcfa29,296f141..3b11422 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@@ -147,7 -147,7 +147,7 @@@ static word plci_remove_check(PLCI *) static void listen_check(DIVA_CAPI_ADAPTER *); static byte AddInfo(byte **, byte **, byte *, byte *); static byte getChannel(API_PARSE *); -static void IndParse(PLCI *, word *, byte **, byte); +static void IndParse(PLCI *, const word *, byte **, byte); static byte ie_compare(byte *, byte *); static word find_cip(DIVA_CAPI_ADAPTER *, byte *, byte *); static word CPN_filter_ok(byte *cpn, DIVA_CAPI_ADAPTER *, word); @@@ -4858,7 -4858,7 +4858,7 @@@ static void sig_ind(PLCI *plci /* included before the ESC_MSGTYPE and MAXPARMSIDS has to be incremented */ /* SMSG is situated at the end because its 0 (for compatibility reasons */ /* (see Info_Mask Bit 4, first IE. then the message type) */ - word parms_id[] = + static const word parms_id[] = {MAXPARMSIDS, CPN, 0xff, DSA, OSA, BC, LLC, HLC, ESC_CAUSE, DSP, DT, CHA, UUI, CONG_RR, CONG_RNR, ESC_CHI, KEY, CHI, CAU, ESC_LAW, RDN, RDX, CONN_NR, RIN, NI, CAI, ESC_CR, @@@ -4866,12 -4866,12 +4866,12 @@@ /* 14 FTY repl by ESC_CHI */ /* 18 PI repl by ESC_LAW */ /* removed OAD changed to 0xff for future use, OAD is multiIE now */ - word multi_fac_id[] = {1, FTY}; - word multi_pi_id[] = {1, PI}; - word multi_CiPN_id[] = {1, OAD}; - word multi_ssext_id[] = {1, ESC_SSEXT}; + static const word multi_fac_id[] = {1, FTY}; + static const word multi_pi_id[] = {1, PI}; + static const word multi_CiPN_id[] = {1, OAD}; + static const word multi_ssext_id[] = {1, ESC_SSEXT};
- word multi_vswitch_id[] = {1, ESC_VSWITCH}; + static const word multi_vswitch_id[] = {1, ESC_VSWITCH};
byte *cau; word ncci; @@@ -8924,7 -8924,7 +8924,7 @@@ static void listen_check(DIVA_CAPI_ADAP /* functions for all parameters sent in INDs */ /*------------------------------------------------------------------*/
-static void IndParse(PLCI *plci, word *parms_id, byte **parms, byte multiIEsize) +static void IndParse(PLCI *plci, const word *parms_id, byte **parms, byte multiIEsize) { word ploc; /* points to current location within packet */ byte w; @@@ -11297,7 -11297,8 +11297,8 @@@ static void mixer_notify_update(PLCI *p ((CAPI_MSG *) msg)->header.ncci = 0; ((CAPI_MSG *) msg)->info.facility_req.Selector = SELECTOR_LINE_INTERCONNECT; ((CAPI_MSG *) msg)->info.facility_req.structs[0] = 3; - PUT_WORD(&(((CAPI_MSG *) msg)->info.facility_req.structs[1]), LI_REQ_SILENT_UPDATE); + ((CAPI_MSG *) msg)->info.facility_req.structs[1] = LI_REQ_SILENT_UPDATE & 0xff; + ((CAPI_MSG *) msg)->info.facility_req.structs[2] = LI_REQ_SILENT_UPDATE >> 8; ((CAPI_MSG *) msg)->info.facility_req.structs[3] = 0; w = api_put(notify_plci->appl, (CAPI_MSG *) msg); if (w != _QUEUE_FULL) diff --combined drivers/net/ethernet/amd/xgbe/xgbe-drv.c index f8648e4,1c87cc2..3aa457c --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@@ -1070,7 -1070,9 +1070,9 @@@ static int xgbe_start(struct xgbe_prv_d
DBGPR("-->xgbe_start\n");
- hw_if->init(pdata); + ret = hw_if->init(pdata); + if (ret) + return ret;
xgbe_napi_enable(pdata, 1);
@@@ -1759,8 -1761,8 +1761,8 @@@ static void xgbe_tx_timeout(struct net_ schedule_work(&pdata->restart_work); }
-static struct rtnl_link_stats64 *xgbe_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *s) +static void xgbe_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *s) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_mmc_stats *pstats = &pdata->mmc_stats; @@@ -1786,6 -1788,8 +1788,6 @@@ s->tx_dropped = netdev->stats.tx_dropped;
DBGPR("<--%s\n", __func__); - - return s; }
static int xgbe_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, diff --combined drivers/net/ethernet/atheros/alx/main.c index 4c80e06,7dcc907..391bb5c --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@@ -685,8 -685,6 +685,6 @@@ static int alx_alloc_rings(struct alx_p return -ENOMEM; }
- alx_reinit_rings(alx); - return 0; }
@@@ -703,7 -701,7 +701,7 @@@ static void alx_free_rings(struct alx_p if (alx->qnapi[0] && alx->qnapi[0]->rxq) kfree(alx->qnapi[0]->rxq->bufs);
- if (!alx->descmem.virt) + if (alx->descmem.virt) dma_free_coherent(&alx->hw.pdev->dev, alx->descmem.size, alx->descmem.virt, @@@ -984,6 -982,7 +982,7 @@@ static int alx_realloc_resources(struc alx_free_rings(alx); alx_free_napis(alx); alx_disable_advanced_intr(alx); + alx_init_intr(alx, false);
err = alx_alloc_napis(alx); if (err) @@@ -1241,6 -1240,12 +1240,12 @@@ static int __alx_open(struct alx_priv * if (err) goto out_free_rings;
+ /* must be called after alx_request_irq because the chip stops working + * if we copy the dma addresses in alx_init_ring_ptrs twice when + * requesting msi-x interrupts failed + */ + alx_reinit_rings(alx); + netif_set_real_num_tx_queues(alx->dev, alx->num_txq); netif_set_real_num_rx_queues(alx->dev, alx->num_rxq);
@@@ -1643,8 -1648,8 +1648,8 @@@ static void alx_poll_controller(struct } #endif
-static struct rtnl_link_stats64 *alx_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *net_stats) +static void alx_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *net_stats) { struct alx_priv *alx = netdev_priv(dev); struct alx_hw_stats *hw_stats = &alx->hw.stats; @@@ -1688,6 -1693,8 +1693,6 @@@ net_stats->rx_packets = hw_stats->rx_ok + net_stats->rx_errors;
spin_unlock(&alx->stats_lock); - - return net_stats; }
static const struct net_device_ops alx_netdev_ops = { @@@ -1816,7 -1823,6 +1821,7 @@@ static int alx_probe(struct pci_dev *pd
netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | + NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_TSO6;
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2b46f9b,4fcc6a8..3d83b90 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@@ -39,6 -39,9 +39,6 @@@ #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/udp_tunnel.h> -#ifdef CONFIG_NET_RX_BUSY_POLL -#include <net/busy_poll.h> -#endif #include <linux/workqueue.h> #include <linux/prefetch.h> #include <linux/cache.h> @@@ -1096,7 -1099,7 +1096,7 @@@ static struct sk_buff *bnxt_gro_func_57 { #ifdef CONFIG_INET struct tcphdr *th; - int len, nw_off, tcp_opt_len; + int len, nw_off, tcp_opt_len = 0;
if (tcp_ts) tcp_opt_len = 12; @@@ -1127,6 -1130,7 +1127,6 @@@ dev_kfree_skb_any(skb); return NULL; } - tcp_gro_complete(skb);
if (nw_off) { /* tunnel */ struct udphdr *uh = NULL; @@@ -1176,8 -1180,6 +1176,8 @@@ static inline struct sk_buff *bnxt_gro_ RX_TPA_END_CMP_PAYLOAD_OFFSET) >> RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT; skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb); + if (likely(skb)) + tcp_gro_complete(skb); #endif return skb; } @@@ -1354,7 -1356,11 +1354,7 @@@ static int bnxt_rx_pkt(struct bnxt *bp rc = -ENOMEM; if (likely(skb)) { skb_record_rx_queue(skb, bnapi->index); - skb_mark_napi_id(skb, &bnapi->napi); - if (bnxt_busy_polling(bnapi)) - netif_receive_skb(skb); - else - napi_gro_receive(&bnapi->napi, skb); + napi_gro_receive(&bnapi->napi, skb); rc = 1; } goto next_rx_no_prod; @@@ -1454,7 -1460,11 +1454,7 @@@ }
skb_record_rx_queue(skb, bnapi->index); - skb_mark_napi_id(skb, &bnapi->napi); - if (bnxt_busy_polling(bnapi)) - netif_receive_skb(skb); - else - napi_gro_receive(&bnapi->napi, skb); + napi_gro_receive(&bnapi->napi, skb); rc = 1;
next_rx: @@@ -1772,6 -1782,9 +1772,6 @@@ static int bnxt_poll(struct napi_struc struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int work_done = 0;
- if (!bnxt_lock_napi(bnapi)) - return budget; - while (1) { work_done += bnxt_poll_work(bp, bnapi, budget - work_done);
@@@ -1779,16 -1792,42 +1779,16 @@@ break;
if (!bnxt_has_work(bp, cpr)) { - napi_complete(napi); - BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons); + if (napi_complete_done(napi, work_done)) + BNXT_CP_DB_REARM(cpr->cp_doorbell, + cpr->cp_raw_cons); break; } } mmiowb(); - bnxt_unlock_napi(bnapi); return work_done; }
-#ifdef CONFIG_NET_RX_BUSY_POLL -static int bnxt_busy_poll(struct napi_struct *napi) -{ - struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi); - struct bnxt *bp = bnapi->bp; - struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; - int rx_work, budget = 4; - - if (atomic_read(&bp->intr_sem) != 0) - return LL_FLUSH_FAILED; - - if (!bp->link_info.link_up) - return LL_FLUSH_FAILED; - - if (!bnxt_lock_poll(bnapi)) - return LL_FLUSH_BUSY; - - rx_work = bnxt_poll_work(bp, bnapi, budget); - - BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons); - - bnxt_unlock_poll(bnapi); - return rx_work; -} -#endif - static void bnxt_free_tx_skbs(struct bnxt *bp) { int i, max_idx; @@@ -2467,8 -2506,6 +2467,8 @@@ static int bnxt_calc_nr_ring_pages(u32 static void bnxt_set_tpa_flags(struct bnxt *bp) { bp->flags &= ~BNXT_FLAG_TPA; + if (bp->flags & BNXT_FLAG_NO_AGG_RINGS) + return; if (bp->dev->features & NETIF_F_LRO) bp->flags |= BNXT_FLAG_LRO; if (bp->dev->features & NETIF_F_GRO) @@@ -2498,7 -2535,7 +2498,7 @@@ void bnxt_set_ring_params(struct bnxt * agg_factor = min_t(u32, 4, 65536 / BNXT_RX_PAGE_SIZE);
bp->flags &= ~BNXT_FLAG_JUMBO; - if (rx_space > PAGE_SIZE) { + if (rx_space > PAGE_SIZE && !(bp->flags & BNXT_FLAG_NO_AGG_RINGS)) { u32 jumbo_factor;
bp->flags |= BNXT_FLAG_JUMBO; @@@ -2632,10 -2669,6 +2632,10 @@@ static int bnxt_alloc_vnic_attributes(s goto out; }
+ if ((bp->flags & BNXT_FLAG_NEW_RSS_CAP) && + !(vnic->flags & BNXT_VNIC_RSS_FLAG)) + continue; + /* Allocate rss table and hash key */ vnic->rss_table = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &vnic->rss_table_dma_addr, @@@ -2960,45 -2993,6 +2960,45 @@@ alloc_mem_err return rc; }
+static void bnxt_disable_int(struct bnxt *bp) +{ + int i; + + if (!bp->bnapi) + return; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + + BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); + } +} + +static void bnxt_disable_int_sync(struct bnxt *bp) +{ + int i; + + atomic_inc(&bp->intr_sem); + + bnxt_disable_int(bp); + for (i = 0; i < bp->cp_nr_rings; i++) + synchronize_irq(bp->irq_tbl[i].vector); +} + +static void bnxt_enable_int(struct bnxt *bp) +{ + int i; + + atomic_set(&bp->intr_sem, 0); + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + + BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons); + } +} + void bnxt_hwrm_cmd_hdr_init(struct bnxt *bp, void *request, u16 req_type, u16 cmpl_ring, u16 target_id) { @@@ -3318,26 -3312,10 +3318,26 @@@ static int bnxt_hwrm_cfa_ntuple_filter_ req.ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4; req.ip_protocol = keys->basic.ip_proto;
- req.src_ipaddr[0] = keys->addrs.v4addrs.src; - req.src_ipaddr_mask[0] = cpu_to_be32(0xffffffff); - req.dst_ipaddr[0] = keys->addrs.v4addrs.dst; - req.dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff); + if (keys->basic.n_proto == htons(ETH_P_IPV6)) { + int i; + + req.ethertype = htons(ETH_P_IPV6); + req.ip_addr_type = + CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6; + *(struct in6_addr *)&req.src_ipaddr[0] = + keys->addrs.v6addrs.src; + *(struct in6_addr *)&req.dst_ipaddr[0] = + keys->addrs.v6addrs.dst; + for (i = 0; i < 4; i++) { + req.src_ipaddr_mask[i] = cpu_to_be32(0xffffffff); + req.dst_ipaddr_mask[i] = cpu_to_be32(0xffffffff); + } + } else { + req.src_ipaddr[0] = keys->addrs.v4addrs.src; + req.src_ipaddr_mask[0] = cpu_to_be32(0xffffffff); + req.dst_ipaddr[0] = keys->addrs.v4addrs.dst; + req.dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff); + }
req.src_port = keys->ports.src; req.src_port_mask = cpu_to_be16(0xffff); @@@ -3584,12 -3562,6 +3584,12 @@@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]); req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE | VNIC_CFG_REQ_ENABLES_MRU); + } else if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) { + req.rss_rule = + cpu_to_le16(bp->vnic_info[0].fw_rss_cos_lb_ctx[0]); + req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE | + VNIC_CFG_REQ_ENABLES_MRU); + req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE); } else { req.rss_rule = cpu_to_le16(0xffff); } @@@ -3693,27 -3665,6 +3693,27 @@@ static int bnxt_hwrm_vnic_alloc(struct return rc; }
+static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp) +{ + struct hwrm_vnic_qcaps_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_vnic_qcaps_input req = {0}; + int rc; + + if (bp->hwrm_spec_code < 0x10600) + return 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_QCAPS, -1, -1); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + if (resp->flags & + cpu_to_le32(VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP)) + bp->flags |= BNXT_FLAG_NEW_RSS_CAP; + } + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp) { u16 i; @@@ -3860,30 -3811,6 +3860,30 @@@ static int hwrm_ring_alloc_send_msg(str return rc; }
+static int bnxt_hwrm_set_async_event_cr(struct bnxt *bp, int idx) +{ + int rc; + + if (BNXT_PF(bp)) { + struct hwrm_func_cfg_input req = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); + req.fid = cpu_to_le16(0xffff); + req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR); + req.async_event_cr = cpu_to_le16(idx); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + } else { + struct hwrm_func_vf_cfg_input req = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); + req.enables = + cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_ASYNC_EVENT_CR); + req.async_event_cr = cpu_to_le16(idx); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + } + return rc; +} + static int bnxt_hwrm_ring_alloc(struct bnxt *bp) { int i, rc = 0; @@@ -3900,12 -3827,6 +3900,12 @@@ goto err_out; BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id; + + if (!i) { + rc = bnxt_hwrm_set_async_event_cr(bp, ring->fw_ring_id); + if (rc) + netdev_warn(bp->dev, "Failed to set async event completion ring.\n"); + } }
for (i = 0; i < bp->tx_nr_rings; i++) { @@@ -4056,12 -3977,6 +4056,12 @@@ static void bnxt_hwrm_ring_free(struct } }
+ /* The completion rings are about to be freed. After that the + * IRQ doorbell will not work anymore. So we need to disable + * IRQ here. + */ + bnxt_disable_int_sync(bp); + for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; @@@ -4077,50 -3992,6 +4077,50 @@@ } }
+/* Caller must hold bp->hwrm_cmd_lock */ +int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings) +{ + struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_func_qcfg_input req = {0}; + int rc; + + if (bp->hwrm_spec_code < 0x10601) + return 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1); + req.fid = cpu_to_le16(fid); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *tx_rings = le16_to_cpu(resp->alloc_tx_rings); + + return rc; +} + +int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings) +{ + struct hwrm_func_cfg_input req = {0}; + int rc; + + if (bp->hwrm_spec_code < 0x10601) + return 0; + + if (BNXT_VF(bp)) + return 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); + req.fid = cpu_to_le16(0xffff); + req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS); + req.num_tx_rings = cpu_to_le16(*tx_rings); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + return rc; + + mutex_lock(&bp->hwrm_cmd_lock); + rc = __bnxt_hwrm_get_tx_rings(bp, 0xffff, tx_rings); + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static void bnxt_hwrm_set_coal_params(struct bnxt *bp, u32 max_bufs, u32 buf_tmrs, u16 flags, struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req) @@@ -4592,12 -4463,8 +4592,12 @@@ static void bnxt_hwrm_resource_free(str
static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) { + struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; int rc;
+ if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) + goto skip_rss_ctx; + /* allocate context for vnic */ rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 0); if (rc) { @@@ -4617,7 -4484,6 +4617,7 @@@ bp->rsscos_nr_ctxs++; }
+skip_rss_ctx: /* configure default vnic, ring grp */ rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); if (rc) { @@@ -4652,17 -4518,13 +4652,17 @@@ static int bnxt_alloc_rfs_vnics(struct int i, rc = 0;
for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_vnic_info *vnic; u16 vnic_id = i + 1; u16 ring_id = i;
if (vnic_id >= bp->nr_vnics) break;
- bp->vnic_info[vnic_id].flags |= BNXT_VNIC_RFS_FLAG; + vnic = &bp->vnic_info[vnic_id]; + vnic->flags |= BNXT_VNIC_RFS_FLAG; + if (bp->flags & BNXT_FLAG_NEW_RSS_CAP) + vnic->flags |= BNXT_VNIC_RFS_NEW_RSS_FLAG; rc = bnxt_hwrm_vnic_alloc(bp, vnic_id, ring_id, 1); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", @@@ -4836,6 -4698,34 +4836,6 @@@ static int bnxt_init_nic(struct bnxt *b return bnxt_init_chip(bp, irq_re_init); }
-static void bnxt_disable_int(struct bnxt *bp) -{ - int i; - - if (!bp->bnapi) - return; - - for (i = 0; i < bp->cp_nr_rings; i++) { - struct bnxt_napi *bnapi = bp->bnapi[i]; - struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; - - BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); - } -} - -static void bnxt_enable_int(struct bnxt *bp) -{ - int i; - - atomic_set(&bp->intr_sem, 0); - for (i = 0; i < bp->cp_nr_rings; i++) { - struct bnxt_napi *bnapi = bp->bnapi[i]; - struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; - - BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons); - } -} - static int bnxt_set_real_num_queues(struct bnxt *bp) { int rc; @@@ -4946,26 -4836,6 +4946,26 @@@ static int bnxt_setup_int_mode(struct b return rc; }
+#ifdef CONFIG_RFS_ACCEL +static unsigned int bnxt_get_max_func_rss_ctxs(struct bnxt *bp) +{ +#if defined(CONFIG_BNXT_SRIOV) + if (BNXT_VF(bp)) + return bp->vf.max_rsscos_ctxs; +#endif + return bp->pf.max_rsscos_ctxs; +} + +static unsigned int bnxt_get_max_func_vnics(struct bnxt *bp) +{ +#if defined(CONFIG_BNXT_SRIOV) + if (BNXT_VF(bp)) + return bp->vf.max_vnics; +#endif + return bp->pf.max_vnics; +} +#endif + unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp) { #if defined(CONFIG_BNXT_SRIOV) @@@ -5224,8 -5094,10 +5224,8 @@@ static void bnxt_disable_napi(struct bn if (!bp->bnapi) return;
- for (i = 0; i < bp->cp_nr_rings; i++) { + for (i = 0; i < bp->cp_nr_rings; i++) napi_disable(&bp->bnapi[i]->napi); - bnxt_disable_poll(bp->bnapi[i]); - } }
static void bnxt_enable_napi(struct bnxt *bp) @@@ -5234,6 -5106,7 +5234,6 @@@
for (i = 0; i < bp->cp_nr_rings; i++) { bp->bnapi[i]->in_reset = false; - bnxt_enable_poll(bp->bnapi[i]); napi_enable(&bp->bnapi[i]->napi); } } @@@ -5441,17 -5314,12 +5441,12 @@@ static int bnxt_update_link(struct bnx if ((link_info->support_auto_speeds | diff) != link_info->support_auto_speeds) { /* An advertised speed is no longer supported, so we need to - * update the advertisement settings. See bnxt_reset() for - * comments about the rtnl_lock() sequence below. + * update the advertisement settings. Caller holds RTNL + * so we can modify link settings. */ - clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_lock(); link_info->advertising = link_info->support_auto_speeds; - if (test_bit(BNXT_STATE_OPEN, &bp->state) && - (link_info->autoneg & BNXT_AUTONEG_SPEED)) + if (link_info->autoneg & BNXT_AUTONEG_SPEED) bnxt_hwrm_set_link_setting(bp, true, false); - set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_unlock(); } return 0; } @@@ -5516,7 -5384,7 +5511,7 @@@ static void bnxt_hwrm_set_link_common(s { u8 autoneg = bp->link_info.autoneg; u16 fw_link_speed = bp->link_info.req_link_speed; - u32 advertising = bp->link_info.advertising; + u16 advertising = bp->link_info.advertising;
if (autoneg & BNXT_AUTONEG_SPEED) { req->auto_mode |= @@@ -5621,45 -5489,6 +5616,45 @@@ static int bnxt_hwrm_shutdown_link(stru return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); }
+static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp) +{ + struct hwrm_port_led_qcaps_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_port_led_qcaps_input req = {0}; + struct bnxt_pf_info *pf = &bp->pf; + int rc; + + if (BNXT_VF(bp) || bp->hwrm_spec_code < 0x10601) + return 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_LED_QCAPS, -1, -1); + req.port_id = cpu_to_le16(pf->port_id); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) { + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; + } + if (resp->num_leds > 0 && resp->num_leds < BNXT_MAX_LED) { + int i; + + bp->num_leds = resp->num_leds; + memcpy(bp->leds, &resp->led0_id, sizeof(bp->leds[0]) * + bp->num_leds); + for (i = 0; i < bp->num_leds; i++) { + struct bnxt_led_info *led = &bp->leds[i]; + __le16 caps = led->led_state_caps; + + if (!led->led_group_id || + !BNXT_LED_ALT_BLINK_CAP(caps)) { + bp->num_leds = 0; + break; + } + } + } + mutex_unlock(&bp->hwrm_cmd_lock); + return 0; +} + static bool bnxt_eee_config_ok(struct bnxt *bp) { struct ethtool_eee *eee = &bp->eee; @@@ -5849,6 -5678,19 +5844,6 @@@ static int bnxt_open(struct net_device return __bnxt_open_nic(bp, true, true); }
-static void bnxt_disable_int_sync(struct bnxt *bp) -{ - int i; - - atomic_inc(&bp->intr_sem); - if (!netif_running(bp->dev)) - return; - - bnxt_disable_int(bp); - for (i = 0; i < bp->cp_nr_rings; i++) - synchronize_irq(bp->irq_tbl[i].vector); -} - int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@@ -5870,12 -5712,13 +5865,12 @@@ while (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state)) msleep(20);
- /* Flush rings before disabling interrupts */ + /* Flush rings and and disable interrupts */ bnxt_shutdown_nic(bp, irq_re_init);
/* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */
bnxt_disable_napi(bp); - bnxt_disable_int_sync(bp); del_timer_sync(&bp->timer); bnxt_free_skbs(bp);
@@@ -5922,14 -5765,16 +5917,14 @@@ static int bnxt_ioctl(struct net_devic return -EOPNOTSUPP; }
-static struct rtnl_link_stats64 * +static void bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u32 i; struct bnxt *bp = netdev_priv(dev);
- memset(stats, 0, sizeof(struct rtnl_link_stats64)); - if (!bp->bnapi) - return stats; + return;
/* TODO check if we need to synchronize with bnxt_close path */ for (i = 0; i < bp->cp_nr_rings; i++) { @@@ -5976,6 -5821,8 +5971,6 @@@ stats->tx_fifo_errors = le64_to_cpu(tx->tx_fifo_underruns); stats->tx_errors = le64_to_cpu(tx->tx_err); } - - return stats; }
static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask) @@@ -6128,36 -5975,20 +6123,36 @@@ skip_uc return rc; }
+/* If the chip and firmware supports RFS */ +static bool bnxt_rfs_supported(struct bnxt *bp) +{ + if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) + return true; + if (bp->flags & BNXT_FLAG_NEW_RSS_CAP) + return true; + return false; +} + +/* If runtime conditions support RFS */ static bool bnxt_rfs_capable(struct bnxt *bp) { #ifdef CONFIG_RFS_ACCEL - struct bnxt_pf_info *pf = &bp->pf; - int vnics; + int vnics, max_vnics, max_rss_ctxs;
if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_MSIX_CAP)) return false;
vnics = 1 + bp->rx_nr_rings; - if (vnics > pf->max_rsscos_ctxs || vnics > pf->max_vnics) { + max_vnics = bnxt_get_max_func_vnics(bp); + max_rss_ctxs = bnxt_get_max_func_rss_ctxs(bp); + + /* RSS contexts not a limiting factor */ + if (bp->flags & BNXT_FLAG_NEW_RSS_CAP) + max_rss_ctxs = max_vnics; + if (vnics > max_vnics || vnics > max_rss_ctxs) { netdev_warn(bp->dev, "Not enough resources to support NTUPLE filters, enough resources for up to %d rx rings\n", - min(pf->max_rsscos_ctxs - 1, pf->max_vnics - 1)); + min(max_rss_ctxs - 1, max_vnics - 1)); return false; }
@@@ -6213,9 -6044,6 +6208,9 @@@ static int bnxt_set_features(struct net if (features & NETIF_F_LRO) flags |= BNXT_FLAG_LRO;
+ if (bp->flags & BNXT_FLAG_NO_AGG_RINGS) + flags &= ~BNXT_FLAG_TPA; + if (features & NETIF_F_HW_VLAN_CTAG_RX) flags |= BNXT_FLAG_STRIP_VLAN;
@@@ -6367,29 -6195,37 +6362,37 @@@ bnxt_restart_timer mod_timer(&bp->timer, jiffies + bp->current_interval); }
- /* Only called from bnxt_sp_task() */ - static void bnxt_reset(struct bnxt *bp, bool silent) + static void bnxt_rtnl_lock_sp(struct bnxt *bp) { - /* bnxt_reset_task() calls bnxt_close_nic() which waits - * for BNXT_STATE_IN_SP_TASK to clear. - * If there is a parallel dev_close(), bnxt_close() may be holding + /* We are called from bnxt_sp_task which has BNXT_STATE_IN_SP_TASK + * set. If the device is being closed, bnxt_close() may be holding * rtnl() and waiting for BNXT_STATE_IN_SP_TASK to clear. So we * must clear BNXT_STATE_IN_SP_TASK before holding rtnl(). */ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); rtnl_lock(); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_reset_task(bp, silent); + } + + static void bnxt_rtnl_unlock_sp(struct bnxt *bp) + { set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); rtnl_unlock(); }
+ /* Only called from bnxt_sp_task() */ + static void bnxt_reset(struct bnxt *bp, bool silent) + { + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) + bnxt_reset_task(bp, silent); + bnxt_rtnl_unlock_sp(bp); + } + static void bnxt_cfg_ntp_filters(struct bnxt *);
static void bnxt_sp_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, sp_task); - int rc;
set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); smp_mb__after_atomic(); @@@ -6403,16 -6239,6 +6406,6 @@@
if (test_and_clear_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event)) bnxt_cfg_ntp_filters(bp); - if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) { - if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, - &bp->sp_event)) - bnxt_hwrm_phy_qcaps(bp); - - rc = bnxt_update_link(bp, true); - if (rc) - netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", - rc); - } if (test_and_clear_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event)) bnxt_hwrm_exec_fwd_req(bp); if (test_and_clear_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event)) { @@@ -6433,18 -6259,39 +6426,39 @@@ bnxt_hwrm_tunnel_dst_port_free( bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE); } + if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) + bnxt_hwrm_port_qstats(bp); + + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They + * must be the last functions to be called before exiting. + */ + if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) { + int rc = 0; + + if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, + &bp->sp_event)) + bnxt_hwrm_phy_qcaps(bp); + + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) + rc = bnxt_update_link(bp, true); + bnxt_rtnl_unlock_sp(bp); + if (rc) + netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", + rc); + } + if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) { + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) + bnxt_get_port_module_status(bp); + bnxt_rtnl_unlock_sp(bp); + } if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, false);
if (test_and_clear_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, true);
- if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) - bnxt_get_port_module_status(bp); - - if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) - bnxt_hwrm_port_qstats(bp); - smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); } @@@ -6625,16 -6472,10 +6639,16 @@@ int bnxt_setup_mq_tc(struct net_device sh = true;
if (tc) { - int max_rx_rings, max_tx_rings, rc; + int max_rx_rings, max_tx_rings, req_tx_rings, rsv_tx_rings, rc;
+ req_tx_rings = bp->tx_nr_rings_per_tc * tc; rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh); - if (rc || bp->tx_nr_rings_per_tc * tc > max_tx_rings) + if (rc || req_tx_rings > max_tx_rings) + return -ENOMEM; + + rsv_tx_rings = req_tx_rings; + if (bnxt_hwrm_reserve_tx_rings(bp, &rsv_tx_rings) || + rsv_tx_rings < req_tx_rings) return -ENOMEM; }
@@@ -6726,18 -6567,12 +6740,18 @@@ static int bnxt_rx_flow_steer(struct ne goto err_free; }
- if ((fkeys->basic.n_proto != htons(ETH_P_IP)) || + if ((fkeys->basic.n_proto != htons(ETH_P_IP) && + fkeys->basic.n_proto != htons(ETH_P_IPV6)) || ((fkeys->basic.ip_proto != IPPROTO_TCP) && (fkeys->basic.ip_proto != IPPROTO_UDP))) { rc = -EPROTONOSUPPORT; goto err_free; } + if (fkeys->basic.n_proto == htons(ETH_P_IPV6) && + bp->hwrm_spec_code < 0x10601) { + rc = -EPROTONOSUPPORT; + goto err_free; + }
memcpy(new_fltr->dst_mac_addr, eth->h_dest, ETH_ALEN); memcpy(new_fltr->src_mac_addr, eth->h_source, ETH_ALEN); @@@ -6944,6 -6779,9 +6958,6 @@@ static const struct net_device_ops bnxt #endif .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = bnxt_busy_poll, -#endif };
static void bnxt_remove_one(struct pci_dev *pdev) @@@ -7082,17 -6920,8 +7096,17 @@@ static int bnxt_get_dflt_rings(struct b int rc;
rc = bnxt_get_max_rings(bp, max_rx, max_tx, shared); - if (rc) - return rc; + if (rc && (bp->flags & BNXT_FLAG_AGG_RINGS)) { + /* Not enough rings, try disabling agg rings. */ + bp->flags &= ~BNXT_FLAG_AGG_RINGS; + rc = bnxt_get_max_rings(bp, max_rx, max_tx, shared); + if (rc) + return rc; + bp->flags |= BNXT_FLAG_NO_AGG_RINGS; + bp->dev->hw_features &= ~NETIF_F_LRO; + bp->dev->features &= ~NETIF_F_LRO; + bnxt_set_ring_params(bp); + }
if (bp->flags & BNXT_FLAG_ROCE_CAP) { int max_cp, max_stat, max_irq; @@@ -7131,11 -6960,6 +7145,11 @@@ static int bnxt_set_dflt_rings(struct b return rc; bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings); bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings); + + rc = bnxt_hwrm_reserve_tx_rings(bp, &bp->tx_nr_rings_per_tc); + if (rc) + netdev_warn(bp->dev, "Unable to reserve tx rings\n"); + bp->tx_nr_rings = bp->tx_nr_rings_per_tc; bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : bp->tx_nr_rings + bp->rx_nr_rings; @@@ -7283,17 -7107,11 +7297,17 @@@ static int bnxt_init_one(struct pci_de }
bnxt_hwrm_func_qcfg(bp); + bnxt_hwrm_port_led_qcaps(bp);
bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); bnxt_set_max_func_irqs(bp, max_irqs); - bnxt_set_dflt_rings(bp); + rc = bnxt_set_dflt_rings(bp); + if (rc) { + netdev_err(bp->dev, "Not enough rings available.\n"); + rc = -ENOMEM; + goto init_err; + }
/* Default RSS hash cfg. */ bp->rss_hash_cfg = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 | @@@ -7308,8 -7126,7 +7322,8 @@@ VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6; }
- if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) { + bnxt_hwrm_vnic_qcaps(bp); + if (bnxt_rfs_supported(bp)) { dev->hw_features |= NETIF_F_NTUPLE; if (bnxt_rfs_capable(bp)) { bp->flags |= BNXT_FLAG_RFS; diff --combined drivers/net/ethernet/ibm/ibmveth.c index c6ba75c,309f5c6..b618be6 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@@ -729,26 -729,20 +729,26 @@@ static int ibmveth_close(struct net_dev return 0; }
-static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int netdev_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { - cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | + u32 supported, advertising; + + supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE); - cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | + advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | ADVERTISED_FIBRE); - ethtool_cmd_speed_set(cmd, SPEED_1000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_FIBRE; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_ENABLE; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 1; + cmd->base.speed = SPEED_1000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_FIBRE; + cmd->base.phy_address = 0; + cmd->base.autoneg = AUTONEG_ENABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; }
@@@ -984,11 -978,11 +984,11 @@@ static void ibmveth_get_ethtool_stats(s
static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, - .get_settings = netdev_get_settings, .get_link = ethtool_op_get_link, .get_strings = ibmveth_get_strings, .get_sset_count = ibmveth_get_sset_count, .get_ethtool_stats = ibmveth_get_ethtool_stats, + .get_link_ksettings = netdev_get_link_ksettings, };
static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@@ -1607,8 -1601,11 +1607,11 @@@ static int ibmveth_probe(struct vio_de netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + netdev->hw_features = NETIF_F_SG; + if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM; + }
netdev->features |= netdev->hw_features;
diff --combined drivers/net/ethernet/mediatek/mtk_eth_soc.c index 25ae0c5,1c29c86..9e75768 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@@ -462,8 -462,8 +462,8 @@@ static void mtk_stats_update(struct mtk } }
-static struct rtnl_link_stats64 *mtk_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *storage) +static void mtk_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *storage) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_hw_stats *hw_stats = mac->hw_stats; @@@ -494,6 -494,8 +494,6 @@@ storage->tx_errors = dev->stats.tx_errors; storage->rx_dropped = dev->stats.rx_dropped; storage->tx_dropped = dev->stats.tx_dropped; - - return storage; }
static inline int mtk_max_frag_size(int mtu) @@@ -2515,7 -2517,7 +2515,7 @@@ static int mtk_remove(struct platform_d }
const struct of_device_id of_mtk_match[] = { - { .compatible = "mediatek,mt7623-eth" }, + { .compatible = "mediatek,mt2701-eth" }, {}, }; MODULE_DEVICE_TABLE(of, of_mtk_match); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6c1a5cb,5197817..6236ce9 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -170,8 -170,7 +170,8 @@@ static int mlx5e_get_sset_count(struct case ETH_SS_STATS: return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + - NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + + NUM_PCIE_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@@ -219,14 -218,6 +219,14 @@@ static void mlx5e_fill_stats_strings(st strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format);
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@@ -339,14 -330,6 +339,14 @@@ static void mlx5e_get_ethtool_stats(str data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i);
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], @@@ -552,7 -535,7 +552,7 @@@ static void mlx5e_get_channels(struct n { struct mlx5e_priv *priv = netdev_priv(dev);
- ch->max_combined = mlx5e_get_max_num_channels(priv->mdev); + ch->max_combined = priv->profile->max_nch(priv->mdev); ch->combined_count = priv->params.num_channels; }
@@@ -560,7 -543,6 +560,6 @@@ static int mlx5e_set_channels(struct ne struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - int ncv = priv->profile->max_nch(priv->mdev); unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; @@@ -571,16 -553,6 +570,6 @@@ __func__); return -EINVAL; } - if (ch->rx_count || ch->tx_count) { - netdev_info(dev, "%s: separate rx/tx count not supported\n", - __func__); - return -EINVAL; - } - if (count > ncv) { - netdev_info(dev, "%s: count (%d) > max (%d)\n", - __func__, count, ncv); - return -EINVAL; - }
if (priv->params.num_channels == count) return 0; @@@ -1476,6 -1448,8 +1465,6 @@@ static int set_pflag_rx_cqe_compress(st { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int err = 0; - bool reset;
if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -ENOTSUPP; @@@ -1485,10 -1459,17 +1474,10 @@@ return -EINVAL; }
- reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (reset) - mlx5e_close_locked(netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable); + mlx5e_modify_rx_cqe_compression_locked(priv, enable); priv->params.rx_cqe_compress_def = enable;
- if (reset) - err = mlx5e_open_locked(netdev); - return err; + return 0; }
static int mlx5e_handle_pflag(struct net_device *netdev, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3d2e1a1,06d5e6f..fd8dff6 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@@ -33,7 -33,6 +33,7 @@@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/bpf_trace.h> #include <net/busy_poll.h> #include "en.h" #include "en_tc.h" @@@ -156,15 -155,17 +156,15 @@@ static inline u32 mlx5e_decompress_cqes return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; }
-void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) { bool was_opened;
if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) return;
- mutex_lock(&priv->state_lock); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - goto unlock; + return;
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) @@@ -175,6 -176,8 +175,6 @@@ if (was_opened) mlx5e_open_locked(priv->netdev);
-unlock: - mutex_unlock(&priv->state_lock); }
#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) @@@ -190,6 -193,9 +190,9 @@@ static inline bool mlx5e_rx_cache_put(s return false; }
+ if (unlikely(page_is_pfmemalloc(dma_info->page))) + return false; + cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; return true; @@@ -261,7 -267,7 +264,7 @@@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq if (unlikely(mlx5e_page_alloc_mapped(rq, di))) return -ENOMEM;
- wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + wqe->data.addr = cpu_to_be64(di->addr + rq->rx_headroom); return 0; }
@@@ -641,9 -647,10 +644,9 @@@ static inline void mlx5e_xmit_xdp_doorb mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); }
-static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, +static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - unsigned int data_offset, - int len) + const struct xdp_buff *xdp) { struct mlx5e_sq *sq = &rq->channel->xdp_sq; struct mlx5_wq_cyc *wq = &sq->wq; @@@ -655,16 -662,9 +658,16 @@@ struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg;
+ ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; - unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; - void *data = page_address(di->page) + data_offset; + unsigned int dma_len = xdp->data_end - xdp->data; + + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || + MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return false; + }
if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { if (sq->db.xdp.doorbell) { @@@ -674,17 -674,16 +677,17 @@@ } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); - return; + return false; }
+ dma_len -= MLX5E_XDP_MIN_INLINE; dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE);
memset(wqe, 0, sizeof(*wqe));
/* copy the inline part */ - memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr_start, xdp->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); @@@ -704,39 -703,32 +707,39 @@@
sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; + return true; }
/* returns true if packet was consumed by xdp */ -static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, - const struct bpf_prog *prog, - struct mlx5e_dma_info *di, - void *data, u16 len) +static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + void *va, u16 *rx_headroom, u32 *len) { + const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; u32 act;
if (!prog) return false;
- xdp.data = data; - xdp.data_end = xdp.data + len; + xdp.data = va + *rx_headroom; + xdp.data_end = xdp.data + *len; + xdp.data_hard_start = va; + act = bpf_prog_run_xdp(prog, &xdp); switch (act) { case XDP_PASS: + *rx_headroom = xdp.data - xdp.data_hard_start; + *len = xdp.data_end - xdp.data; return false; case XDP_TX: - mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp))) + trace_xdp_exception(rq->netdev, prog, act); return true; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(rq->netdev, prog, act); case XDP_DROP: rq->stats.xdp_drop++; mlx5e_page_release(rq, di, true); @@@ -751,16 -743,15 +754,16 @@@ struct sk_buff *skb_from_cqe(struct mlx struct mlx5e_dma_info *di; struct sk_buff *skb; void *va, *data; + u16 rx_headroom = rq->rx_headroom; bool consumed;
di = &rq->dma_info[wqe_counter]; va = page_address(di->page); - data = va + MLX5_RX_HEADROOM; + data = va + rx_headroom;
dma_sync_single_range_for_cpu(rq->pdev, di->addr, - MLX5_RX_HEADROOM, + rx_headroom, rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(data); @@@ -772,7 -763,8 +775,7 @@@ }
rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, READ_ONCE(rq->xdp_prog), di, data, - cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@@ -788,7 -780,7 +791,7 @@@ page_ref_inc(di->page); mlx5e_page_release(rq, di, true);
- skb_reserve(skb, MLX5_RX_HEADROOM); + skb_reserve(skb, rx_headroom); skb_put(skb, cqe_bcnt);
return skb; diff --combined drivers/net/ethernet/qlogic/qed/qed_ll2.c index 05e32f4,873ce2c..02c5d47 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@@ -1,33 -1,10 +1,33 @@@ /* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation * - * Copyright (c) 2015 QLogic Corporation + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */
#include <linux/types.h> @@@ -320,7 -297,7 +320,7 @@@ static void qed_ll2_txq_flush(struct qe list_del(&p_pkt->list_entry); b_last_packet = list_empty(&p_tx->active_descq); list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); - if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) { struct qed_ooo_buffer *p_buffer;
p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie; @@@ -332,7 -309,7 +332,7 @@@ b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; tx_frag = p_pkt->bds_set[0].tx_frag; - if (p_ll2_conn->gsi_enable) + if (p_ll2_conn->conn.gsi_enable) qed_ll2b_release_tx_gsi_packet(p_hwfn, p_ll2_conn-> my_id, @@@ -401,7 -378,7 +401,7 @@@ static int qed_ll2_txq_completion(struc
spin_unlock_irqrestore(&p_tx->lock, flags); tx_frag = p_pkt->bds_set[0].tx_frag; - if (p_ll2_conn->gsi_enable) + if (p_ll2_conn->conn.gsi_enable) qed_ll2b_complete_tx_gsi_packet(p_hwfn, p_ll2_conn->my_id, p_pkt->cookie, @@@ -573,7 -550,7 +573,7 @@@ static void qed_ll2_rxq_flush(struct qe
list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
- if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) { struct qed_ooo_buffer *p_buffer;
p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie; @@@ -761,7 -738,7 +761,7 @@@ qed_ooo_submit_tx_buffers(struct qed_hw rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1, p_buffer->vlan, bd_flags, l4_hdr_offset_w, - p_ll2_conn->tx_dest, 0, + p_ll2_conn->conn.tx_dest, 0, first_frag, p_buffer->packet_length, p_buffer, true); @@@ -881,7 -858,7 +881,7 @@@ qed_ll2_acquire_connection_ooo(struct q u16 buf_idx; int rc = 0;
- if (p_ll2_info->conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_info->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO) return rc;
if (!rx_num_ooo_buffers) @@@ -924,7 -901,7 +924,7 @@@ static voi qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn) { - if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO) return;
qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); @@@ -936,7 -913,7 +936,7 @@@ static void qed_ll2_release_connection_ { struct qed_ooo_buffer *p_buffer;
- if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO) return;
qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); @@@ -968,23 -945,19 +968,19 @@@ static int qed_ll2_start_ooo(struct qed { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; - struct qed_ll2_info *ll2_info; + struct qed_ll2_conn ll2_info; int rc;
- ll2_info = kzalloc(sizeof(*ll2_info), GFP_KERNEL); - if (!ll2_info) - return -ENOMEM; - ll2_info->conn_type = QED_LL2_TYPE_ISCSI_OOO; - ll2_info->mtu = params->mtu; - ll2_info->rx_drop_ttl0_flg = params->drop_ttl0_packets; - ll2_info->rx_vlan_removal_en = params->rx_vlan_stripping; - ll2_info->tx_tc = OOO_LB_TC; - ll2_info->tx_dest = CORE_TX_DEST_LB; - - rc = qed_ll2_acquire_connection(hwfn, ll2_info, + ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO; + ll2_info.mtu = params->mtu; + ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; + ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; + ll2_info.tx_tc = OOO_LB_TC; + ll2_info.tx_dest = CORE_TX_DEST_LB; + + rc = qed_ll2_acquire_connection(hwfn, &ll2_info, QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, handle); - kfree(ll2_info); if (rc) { DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n"); goto out; @@@ -1029,7 -1002,7 +1025,7 @@@ static int qed_sp_ll2_rx_queue_start(st struct qed_ll2_info *p_ll2_conn, u8 action_on_error) { - enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn.conn_type; struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; struct core_rx_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; @@@ -1055,7 -1028,7 +1051,7 @@@ p_ramrod->sb_index = p_rx->rx_sb_index; p_ramrod->complete_event_flg = 1;
- p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->conn.mtu); DMA_REGPAIR_LE(p_ramrod->bd_base, p_rx->rxq_chain.p_phys_addr); cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain); @@@ -1063,8 -1036,8 +1059,8 @@@ DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, qed_chain_get_pbl_phys(&p_rx->rcq_chain));
- p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg; - p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en; + p_ramrod->drop_ttl0_flg = p_ll2_conn->conn.rx_drop_ttl0_flg; + p_ramrod->inner_vlan_removal_en = p_ll2_conn->conn.rx_vlan_removal_en; p_ramrod->queue_id = p_ll2_conn->queue_id; p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0 : 1; @@@ -1079,14 -1052,14 +1075,14 @@@ }
p_ramrod->action_on_error.error_type = action_on_error; - p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; + p_ramrod->gsi_offload_flag = p_ll2_conn->conn.gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); }
static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn) { - enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn.conn_type; struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; struct core_tx_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; @@@ -1098,7 -1071,7 +1094,7 @@@ if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0;
- if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) p_ll2_conn->tx_stats_en = 0; else p_ll2_conn->tx_stats_en = 1; @@@ -1119,7 -1092,7 +1115,7 @@@
p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); p_ramrod->sb_index = p_tx->tx_sb_index; - p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->conn.mtu); p_ramrod->stats_en = p_ll2_conn->tx_stats_en; p_ramrod->stats_id = p_ll2_conn->tx_stats_id;
@@@ -1129,7 -1102,7 +1125,7 @@@ p_ramrod->pbl_size = cpu_to_le16(pbl_size);
memset(&pq_params, 0, sizeof(pq_params)); - pq_params.core.tc = p_ll2_conn->tx_tc; + pq_params.core.tc = p_ll2_conn->conn.tx_tc; pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params); p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
@@@ -1146,7 -1119,7 +1142,7 @@@ DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); }
- p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; + p_ramrod->gsi_offload_flag = p_ll2_conn->conn.gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); }
@@@ -1247,7 -1220,7 +1243,7 @@@ qed_ll2_acquire_connection_rx(struct qe
DP_VERBOSE(p_hwfn, QED_MSG_LL2, "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", - p_ll2_info->conn_type, rx_num_desc); + p_ll2_info->conn.conn_type, rx_num_desc);
out: return rc; @@@ -1285,7 -1258,7 +1281,7 @@@ static int qed_ll2_acquire_connection_t
DP_VERBOSE(p_hwfn, QED_MSG_LL2, "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", - p_ll2_info->conn_type, tx_num_desc); + p_ll2_info->conn.conn_type, tx_num_desc);
out: if (rc) @@@ -1296,7 -1269,7 +1292,7 @@@ }
int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, - struct qed_ll2_info *p_params, + struct qed_ll2_conn *p_params, u16 rx_num_desc, u16 tx_num_desc, u8 *p_connection_handle) @@@ -1325,15 -1298,7 +1321,7 @@@ if (!p_ll2_info) return -EBUSY;
- p_ll2_info->conn_type = p_params->conn_type; - p_ll2_info->mtu = p_params->mtu; - p_ll2_info->rx_drop_ttl0_flg = p_params->rx_drop_ttl0_flg; - p_ll2_info->rx_vlan_removal_en = p_params->rx_vlan_removal_en; - p_ll2_info->tx_tc = p_params->tx_tc; - p_ll2_info->tx_dest = p_params->tx_dest; - p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; - p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; - p_ll2_info->gsi_enable = p_params->gsi_enable; + p_ll2_info->conn = *p_params;
rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); if (rc) @@@ -1394,9 -1359,9 +1382,9 @@@ static int qed_ll2_establish_connection
SET_FIELD(action_on_error, CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, - p_ll2_conn->ai_err_packet_too_big); + p_ll2_conn->conn.ai_err_packet_too_big); SET_FIELD(action_on_error, - CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->ai_err_no_buf); + CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->conn.ai_err_no_buf);
return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); } @@@ -1623,7 -1588,7 +1611,7 @@@ static void qed_ll2_prepare_tx_packet_s "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", p_ll2->queue_id, p_ll2->cid, - p_ll2->conn_type, + p_ll2->conn.conn_type, prod_idx, first_frag_len, num_of_bds, @@@ -1699,7 -1664,7 +1687,7 @@@ static void qed_ll2_tx_packet_notify(st (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", p_ll2_conn->queue_id, - p_ll2_conn->cid, p_ll2_conn->conn_type, db_msg.spq_prod); + p_ll2_conn->cid, p_ll2_conn->conn.conn_type, db_msg.spq_prod); }
int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, @@@ -1840,7 -1805,7 +1828,7 @@@ int qed_ll2_terminate_connection(struc qed_ll2_rxq_flush(p_hwfn, connection_handle); }
- if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
return rc; @@@ -2016,7 -1981,7 +2004,7 @@@ static void qed_ll2_register_cb_ops(str
static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) { - struct qed_ll2_info ll2_info; + struct qed_ll2_conn ll2_info; struct qed_ll2_buffer *buffer, *tmp_buffer; enum qed_ll2_conn_type conn_type; struct qed_ptt *p_ptt; @@@ -2064,6 -2029,7 +2052,7 @@@
/* Prepare the temporary ll2 information */ memset(&ll2_info, 0, sizeof(ll2_info)); + ll2_info.conn_type = conn_type; ll2_info.mtu = params->mtu; ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; @@@ -2143,7 -2109,6 +2132,6 @@@ }
ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); - return 0;
release_terminate_all: diff --combined drivers/net/ethernet/qlogic/qed/qed_ll2.h index c7f2975,3141792..db3e4fc --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@@ -1,33 -1,10 +1,33 @@@ /* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation * - * Copyright (c) 2015 QLogic Corporation + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */
#ifndef _QED_LL2_H @@@ -135,15 -112,8 +135,8 @@@ struct qed_ll2_tx_queue bool b_completing_packet; };
- struct qed_ll2_info { - /* Lock protecting the state of LL2 */ - struct mutex mutex; + struct qed_ll2_conn { enum qed_ll2_conn_type conn_type; - u32 cid; - u8 my_id; - u8 queue_id; - u8 tx_stats_id; - bool b_active; u16 mtu; u8 rx_drop_ttl0_flg; u8 rx_vlan_removal_en; @@@ -151,10 -121,21 +144,21 @@@ enum core_tx_dest tx_dest; enum core_error_handle ai_err_packet_too_big; enum core_error_handle ai_err_no_buf; + u8 gsi_enable; + }; + + struct qed_ll2_info { + /* Lock protecting the state of LL2 */ + struct mutex mutex; + struct qed_ll2_conn conn; + u32 cid; + u8 my_id; + u8 queue_id; + u8 tx_stats_id; + bool b_active; u8 tx_stats_en; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; - u8 gsi_enable; };
/** @@@ -172,7 -153,7 +176,7 @@@ * @return 0 on success, failure otherwise */ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, - struct qed_ll2_info *p_params, + struct qed_ll2_conn *p_params, u16 rx_num_desc, u16 tx_num_desc, u8 *p_connection_handle); diff --combined drivers/net/ethernet/qlogic/qed/qed_roce.c index bd4cad2,2dbdb32..c3c8c50 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@@ -1,5 -1,5 +1,5 @@@ /* QLogic qed NIC Driver - * Copyright (c) 2015-2016 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@@ -2632,7 -2632,7 +2632,7 @@@ static int qed_roce_ll2_start(struct qe { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_roce_ll2_info *roce_ll2; - struct qed_ll2_info ll2_params; + struct qed_ll2_conn ll2_params; int rc;
if (!params) { diff --combined drivers/net/phy/marvell.c index b5b73ff,ed0d235..a3e3733 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@@ -17,10 -17,8 +17,10 @@@ */ #include <linux/kernel.h> #include <linux/string.h> +#include <linux/ctype.h> #include <linux/errno.h> #include <linux/unistd.h> +#include <linux/hwmon.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/delay.h> @@@ -92,17 -90,6 +92,17 @@@ #define MII_88E1121_PHY_MSCR_TX_DELAY BIT(4) #define MII_88E1121_PHY_MSCR_DELAY_MASK (~(0x3 << 4))
+#define MII_88E1121_MISC_TEST 0x1a +#define MII_88E1510_MISC_TEST_TEMP_THRESHOLD_MASK 0x1f00 +#define MII_88E1510_MISC_TEST_TEMP_THRESHOLD_SHIFT 8 +#define MII_88E1510_MISC_TEST_TEMP_IRQ_EN BIT(7) +#define MII_88E1510_MISC_TEST_TEMP_IRQ BIT(6) +#define MII_88E1121_MISC_TEST_TEMP_SENSOR_EN BIT(5) +#define MII_88E1121_MISC_TEST_TEMP_MASK 0x1f + +#define MII_88E1510_TEMP_SENSOR 0x1b +#define MII_88E1510_TEMP_SENSOR_MASK 0xff + #define MII_88E1318S_PHY_MSCR1_REG 16 #define MII_88E1318S_PHY_MSCR1_PAD_ODD BIT(6)
@@@ -185,8 -172,6 +185,8 @@@ static struct marvell_hw_stat marvell_h
struct marvell_priv { u64 stats[ARRAY_SIZE(marvell_hw_stats)]; + char *hwmon_name; + struct device *hwmon_dev; };
static int marvell_ack_interrupt(struct phy_device *phydev) @@@ -1483,371 -1468,6 +1483,371 @@@ static void marvell_get_stats(struct ph data[i] = marvell_get_stat(phydev, i); }
+#ifdef CONFIG_HWMON +static int m88e1121_get_temp(struct phy_device *phydev, long *temp) +{ + int ret; + int val; + + *temp = 0; + + mutex_lock(&phydev->lock); + + ret = phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x6); + if (ret < 0) + goto error; + + /* Enable temperature sensor */ + ret = phy_read(phydev, MII_88E1121_MISC_TEST); + if (ret < 0) + goto error; + + ret = phy_write(phydev, MII_88E1121_MISC_TEST, + ret | MII_88E1121_MISC_TEST_TEMP_SENSOR_EN); + if (ret < 0) + goto error; + + /* Wait for temperature to stabilize */ + usleep_range(10000, 12000); + + val = phy_read(phydev, MII_88E1121_MISC_TEST); + if (val < 0) { + ret = val; + goto error; + } + + /* Disable temperature sensor */ + ret = phy_write(phydev, MII_88E1121_MISC_TEST, + ret & ~MII_88E1121_MISC_TEST_TEMP_SENSOR_EN); + if (ret < 0) + goto error; + + *temp = ((val & MII_88E1121_MISC_TEST_TEMP_MASK) - 5) * 5000; + +error: + phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x0); + mutex_unlock(&phydev->lock); + + return ret; +} + +static int m88e1121_hwmon_read(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, long *temp) +{ + struct phy_device *phydev = dev_get_drvdata(dev); + int err; + + switch (attr) { + case hwmon_temp_input: + err = m88e1121_get_temp(phydev, temp); + break; + default: + return -EOPNOTSUPP; + } + + return err; +} + +static umode_t m88e1121_hwmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_input: + return 0444; + default: + return 0; + } +} + +static u32 m88e1121_hwmon_chip_config[] = { + HWMON_C_REGISTER_TZ, + 0 +}; + +static const struct hwmon_channel_info m88e1121_hwmon_chip = { + .type = hwmon_chip, + .config = m88e1121_hwmon_chip_config, +}; + +static u32 m88e1121_hwmon_temp_config[] = { + HWMON_T_INPUT, + 0 +}; + +static const struct hwmon_channel_info m88e1121_hwmon_temp = { + .type = hwmon_temp, + .config = m88e1121_hwmon_temp_config, +}; + +static const struct hwmon_channel_info *m88e1121_hwmon_info[] = { + &m88e1121_hwmon_chip, + &m88e1121_hwmon_temp, + NULL +}; + +static const struct hwmon_ops m88e1121_hwmon_hwmon_ops = { + .is_visible = m88e1121_hwmon_is_visible, + .read = m88e1121_hwmon_read, +}; + +static const struct hwmon_chip_info m88e1121_hwmon_chip_info = { + .ops = &m88e1121_hwmon_hwmon_ops, + .info = m88e1121_hwmon_info, +}; + +static int m88e1510_get_temp(struct phy_device *phydev, long *temp) +{ + int ret; + + *temp = 0; + + mutex_lock(&phydev->lock); + + ret = phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x6); + if (ret < 0) + goto error; + + ret = phy_read(phydev, MII_88E1510_TEMP_SENSOR); + if (ret < 0) + goto error; + + *temp = ((ret & MII_88E1510_TEMP_SENSOR_MASK) - 25) * 1000; + +error: + phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x0); + mutex_unlock(&phydev->lock); + + return ret; +} + +int m88e1510_get_temp_critical(struct phy_device *phydev, long *temp) +{ + int ret; + + *temp = 0; + + mutex_lock(&phydev->lock); + + ret = phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x6); + if (ret < 0) + goto error; + + ret = phy_read(phydev, MII_88E1121_MISC_TEST); + if (ret < 0) + goto error; + + *temp = (((ret & MII_88E1510_MISC_TEST_TEMP_THRESHOLD_MASK) >> + MII_88E1510_MISC_TEST_TEMP_THRESHOLD_SHIFT) * 5) - 25; + /* convert to mC */ + *temp *= 1000; + +error: + phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x0); + mutex_unlock(&phydev->lock); + + return ret; +} + +int m88e1510_set_temp_critical(struct phy_device *phydev, long temp) +{ + int ret; + + mutex_lock(&phydev->lock); + + ret = phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x6); + if (ret < 0) + goto error; + + ret = phy_read(phydev, MII_88E1121_MISC_TEST); + if (ret < 0) + goto error; + + temp = temp / 1000; + temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f); + ret = phy_write(phydev, MII_88E1121_MISC_TEST, + (ret & ~MII_88E1510_MISC_TEST_TEMP_THRESHOLD_MASK) | + (temp << MII_88E1510_MISC_TEST_TEMP_THRESHOLD_SHIFT)); + +error: + phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x0); + mutex_unlock(&phydev->lock); + + return ret; +} + +int m88e1510_get_temp_alarm(struct phy_device *phydev, long *alarm) +{ + int ret; + + *alarm = false; + + mutex_lock(&phydev->lock); + + ret = phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x6); + if (ret < 0) + goto error; + + ret = phy_read(phydev, MII_88E1121_MISC_TEST); + if (ret < 0) + goto error; + *alarm = !!(ret & MII_88E1510_MISC_TEST_TEMP_IRQ); + +error: + phy_write(phydev, MII_M1145_PHY_EXT_ADDR_PAGE, 0x0); + mutex_unlock(&phydev->lock); + + return ret; +} + +static int m88e1510_hwmon_read(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, long *temp) +{ + struct phy_device *phydev = dev_get_drvdata(dev); + int err; + + switch (attr) { + case hwmon_temp_input: + err = m88e1510_get_temp(phydev, temp); + break; + case hwmon_temp_crit: + err = m88e1510_get_temp_critical(phydev, temp); + break; + case hwmon_temp_max_alarm: + err = m88e1510_get_temp_alarm(phydev, temp); + break; + default: + return -EOPNOTSUPP; + } + + return err; +} + +static int m88e1510_hwmon_write(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, long temp) +{ + struct phy_device *phydev = dev_get_drvdata(dev); + int err; + + switch (attr) { + case hwmon_temp_crit: + err = m88e1510_set_temp_critical(phydev, temp); + break; + default: + return -EOPNOTSUPP; + } + return err; +} + +static umode_t m88e1510_hwmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_max_alarm: + return 0444; + case hwmon_temp_crit: + return 0644; + default: + return 0; + } +} + +static u32 m88e1510_hwmon_temp_config[] = { + HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM, + 0 +}; + +static const struct hwmon_channel_info m88e1510_hwmon_temp = { + .type = hwmon_temp, + .config = m88e1510_hwmon_temp_config, +}; + +static const struct hwmon_channel_info *m88e1510_hwmon_info[] = { + &m88e1121_hwmon_chip, + &m88e1510_hwmon_temp, + NULL +}; + +static const struct hwmon_ops m88e1510_hwmon_hwmon_ops = { + .is_visible = m88e1510_hwmon_is_visible, + .read = m88e1510_hwmon_read, + .write = m88e1510_hwmon_write, +}; + +static const struct hwmon_chip_info m88e1510_hwmon_chip_info = { + .ops = &m88e1510_hwmon_hwmon_ops, + .info = m88e1510_hwmon_info, +}; + +static int marvell_hwmon_name(struct phy_device *phydev) +{ + struct marvell_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + const char *devname = dev_name(dev); + size_t len = strlen(devname); + int i, j; + + priv->hwmon_name = devm_kzalloc(dev, len, GFP_KERNEL); + if (!priv->hwmon_name) + return -ENOMEM; + + for (i = j = 0; i < len && devname[i]; i++) { + if (isalnum(devname[i])) + priv->hwmon_name[j++] = devname[i]; + } + + return 0; +} + +static int marvell_hwmon_probe(struct phy_device *phydev, + const struct hwmon_chip_info *chip) +{ + struct marvell_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + int err; + + err = marvell_hwmon_name(phydev); + if (err) + return err; + + priv->hwmon_dev = devm_hwmon_device_register_with_info( + dev, priv->hwmon_name, phydev, chip, NULL); + + return PTR_ERR_OR_ZERO(priv->hwmon_dev); +} + +static int m88e1121_hwmon_probe(struct phy_device *phydev) +{ + return marvell_hwmon_probe(phydev, &m88e1121_hwmon_chip_info); +} + +static int m88e1510_hwmon_probe(struct phy_device *phydev) +{ + return marvell_hwmon_probe(phydev, &m88e1510_hwmon_chip_info); +} +#else +static int m88e1121_hwmon_probe(struct phy_device *phydev) +{ + return 0; +} + +static int m88e1510_hwmon_probe(struct phy_device *phydev) +{ + return 0; +} +#endif + static int marvell_probe(struct phy_device *phydev) { struct marvell_priv *priv; @@@ -1861,47 -1481,14 +1861,47 @@@ return 0; }
+static int m88e1121_probe(struct phy_device *phydev) +{ + int err; + + err = marvell_probe(phydev); + if (err) + return err; + + return m88e1121_hwmon_probe(phydev); +} + +static int m88e1510_probe(struct phy_device *phydev) +{ + int err; + + err = marvell_probe(phydev); + if (err) + return err; + + return m88e1510_hwmon_probe(phydev); +} + +static void marvell_remove(struct phy_device *phydev) +{ +#ifdef CONFIG_HWMON + + struct marvell_priv *priv = phydev->priv; + + if (priv && priv->hwmon_dev) + hwmon_device_unregister(priv->hwmon_dev); +#endif +} + static struct phy_driver marvell_drivers[] = { { .phy_id = MARVELL_PHY_ID_88E1101, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1101", .features = PHY_GBIT_FEATURES, - .probe = marvell_probe, .flags = PHY_HAS_INTERRUPT, + .probe = marvell_probe, .config_init = &marvell_config_init, .config_aneg = &marvell_config_aneg, .read_status = &genphy_read_status, @@@ -1973,8 -1560,7 +1973,8 @@@ .name = "Marvell 88E1121R", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .probe = marvell_probe, + .probe = &m88e1121_probe, + .remove = &marvell_remove, .config_init = &m88e1121_config_init, .config_aneg = &m88e1121_config_aneg, .read_status = &marvell_read_status, @@@ -2086,14 -1672,15 +2086,16 @@@ .name = "Marvell 88E1510", .features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE, .flags = PHY_HAS_INTERRUPT, - .probe = marvell_probe, + .probe = &m88e1510_probe, + .remove = &marvell_remove, .config_init = &m88e1510_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .did_interrupt = &m88e1121_did_interrupt, + .get_wol = &m88e1318_get_wol, + .set_wol = &m88e1318_set_wol, .resume = &marvell_resume, .suspend = &marvell_suspend, .get_sset_count = marvell_get_sset_count, @@@ -2106,8 -1693,7 +2108,8 @@@ .name = "Marvell 88E1540", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .probe = marvell_probe, + .probe = m88e1510_probe, + .remove = &marvell_remove, .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, diff --combined drivers/net/tun.c index 13890ac,2cd10b2..8a7d6b9 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@@ -218,7 -218,6 +218,7 @@@ struct tun_struct struct list_head disabled; void *security; u32 flow_count; + u32 rx_batched; struct tun_pcpu_stats __percpu *pcpu_stats; };
@@@ -523,7 -522,6 +523,7 @@@ static void tun_queue_purge(struct tun_ while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) kfree_skb(skb);
+ skb_queue_purge(&tfile->sk.sk_write_queue); skb_queue_purge(&tfile->sk.sk_error_queue); }
@@@ -955,7 -953,7 +955,7 @@@ static void tun_set_headroom(struct net tun->align = new_hr; }
-static struct rtnl_link_stats64 * +static void tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0; @@@ -989,6 -987,7 +989,6 @@@ stats->rx_dropped = rx_dropped; stats->rx_frame_errors = rx_frame_errors; stats->tx_dropped = tx_dropped; - return stats; }
static const struct net_device_ops tun_netdev_ops = { @@@ -1141,46 -1140,10 +1141,46 @@@ static struct sk_buff *tun_alloc_skb(st return skb; }
+static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, + struct sk_buff *skb, int more) +{ + struct sk_buff_head *queue = &tfile->sk.sk_write_queue; + struct sk_buff_head process_queue; + u32 rx_batched = tun->rx_batched; + bool rcv = false; + + if (!rx_batched || (!more && skb_queue_empty(queue))) { + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); + return; + } + + spin_lock(&queue->lock); + if (!more || skb_queue_len(queue) == rx_batched) { + __skb_queue_head_init(&process_queue); + skb_queue_splice_tail_init(queue, &process_queue); + rcv = true; + } else { + __skb_queue_tail(queue, skb); + } + spin_unlock(&queue->lock); + + if (rcv) { + struct sk_buff *nskb; + + local_bh_disable(); + while ((nskb = __skb_dequeue(&process_queue))) + netif_receive_skb(nskb); + netif_receive_skb(skb); + local_bh_enable(); + } +} + /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, struct iov_iter *from, - int noblock) + int noblock, bool more) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; @@@ -1321,7 -1284,9 +1321,7 @@@
rxhash = skb_get_hash(skb); #ifndef CONFIG_4KSTACKS - local_bh_disable(); - netif_receive_skb(skb); - local_bh_enable(); + tun_rx_batched(tun, tfile, skb, more); #else netif_rx_ni(skb); #endif @@@ -1347,8 -1312,7 +1347,8 @@@ static ssize_t tun_chr_write_iter(struc if (!tun) return -EBADFD;
- result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK); + result = tun_get_user(tun, tfile, NULL, from, + file->f_flags & O_NONBLOCK, false);
tun_put(tun); return result; @@@ -1396,7 -1360,7 +1396,7 @@@ static ssize_t tun_put_user(struct tun_ return -EINVAL;
if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun))) { + tun_is_little_endian(tun), true)) { struct skb_shared_info *sinfo = skb_shinfo(skb); pr_err("unexpected GSO type: " "0x%x, gso_size %d, hdr_len %d\n", @@@ -1606,8 -1570,7 +1606,8 @@@ static int tun_sendmsg(struct socket *s return -EBADFD;
ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, - m->msg_flags & MSG_DONTWAIT); + m->msg_flags & MSG_DONTWAIT, + m->msg_flags & MSG_MORE); tun_put(tun); return ret; } @@@ -1808,7 -1771,6 +1808,7 @@@ static int tun_set_iff(struct net *net tun->align = NET_SKB_PAD; tun->filter_attached = false; tun->sndbuf = tfile->socket.sk->sk_sndbuf; + tun->rx_batched = 0;
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); if (!tun->pcpu_stats) { @@@ -2477,29 -2439,6 +2477,29 @@@ static void tun_set_msglevel(struct net #endif }
+static int tun_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct tun_struct *tun = netdev_priv(dev); + + ec->rx_max_coalesced_frames = tun->rx_batched; + + return 0; +} + +static int tun_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct tun_struct *tun = netdev_priv(dev); + + if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT) + tun->rx_batched = NAPI_POLL_WEIGHT; + else + tun->rx_batched = ec->rx_max_coalesced_frames; + + return 0; +} + static const struct ethtool_ops tun_ethtool_ops = { .get_settings = tun_get_settings, .get_drvinfo = tun_get_drvinfo, @@@ -2507,8 -2446,6 +2507,8 @@@ .set_msglevel = tun_set_msglevel, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, + .get_coalesce = tun_get_coalesce, + .set_coalesce = tun_set_coalesce, };
static int tun_queue_resize(struct tun_struct *tun) diff --combined drivers/net/usb/cdc_ether.c index 620ba8e,86144f9..f5552aa --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@@ -466,7 -466,7 +466,7 @@@ static int usbnet_cdc_zte_rx_fixup(stru * connected. This causes the link state to be incorrect. Work around this by * always setting the state to off, then on. */ -void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb) +static void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event;
@@@ -531,6 -531,7 +531,7 @@@ static const struct driver_info wwan_in #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef #define NVIDIA_VENDOR_ID 0x0955 + #define HP_VENDOR_ID 0x03f0
static const struct usb_device_id products[] = { /* BLACKLIST !! @@@ -677,6 -678,13 +678,13 @@@ .driver_info = 0, },
+ /* HP lt2523 (Novatel E371) - handled by qmi_wwan */ + { + USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, + }, + /* AnyDATA ADU960S - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, diff --combined drivers/net/usb/r8152.c index d59d773,ad42295..986243c --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@@ -32,7 -32,7 +32,7 @@@ #define NETNEXT_VERSION "08"
/* Information for net */ - #define NET_VERSION "6" + #define NET_VERSION "8"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers nic_swsd@realtek.com" @@@ -1936,6 -1936,9 +1936,9 @@@ static int r8152_poll(struct napi_struc napi_complete(napi); if (!list_empty(&tp->rx_done)) napi_schedule(napi); + else if (!skb_queue_empty(&tp->tx_queue) && + !list_empty(&tp->tx_free)) + napi_schedule(napi); }
return work_done; @@@ -3155,10 -3158,13 +3158,13 @@@ static void set_carrier(struct r8152 *t if (!netif_carrier_ok(netdev)) { tp->rtl_ops.enable(tp); set_bit(RTL8152_SET_RX_MODE, &tp->flags); + netif_stop_queue(netdev); napi_disable(&tp->napi); netif_carrier_on(netdev); rtl_start_rx(tp); napi_enable(&tp->napi); + netif_wake_queue(netdev); + netif_info(tp, link, netdev, "carrier on\n"); } } else { if (netif_carrier_ok(netdev)) { @@@ -3166,6 -3172,7 +3172,7 @@@ napi_disable(&tp->napi); tp->rtl_ops.disable(tp); napi_enable(&tp->napi); + netif_info(tp, link, netdev, "carrier off\n"); } } } @@@ -3515,12 -3522,12 +3522,12 @@@ static int rtl8152_pre_reset(struct usb if (!netif_running(netdev)) return 0;
+ netif_stop_queue(netdev); napi_disable(&tp->napi); clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); if (netif_carrier_ok(netdev)) { - netif_stop_queue(netdev); mutex_lock(&tp->control); tp->rtl_ops.disable(tp); mutex_unlock(&tp->control); @@@ -3545,12 -3552,17 +3552,17 @@@ static int rtl8152_post_reset(struct us if (netif_carrier_ok(netdev)) { mutex_lock(&tp->control); tp->rtl_ops.enable(tp); + rtl_start_rx(tp); rtl8152_set_rx_mode(netdev); mutex_unlock(&tp->control); - netif_wake_queue(netdev); }
napi_enable(&tp->napi); + netif_wake_queue(netdev); + usb_submit_urb(tp->intr_urb, GFP_KERNEL); + + if (!list_empty(&tp->rx_done)) + napi_schedule(&tp->napi);
return 0; } @@@ -3572,19 -3584,26 +3584,26 @@@ static bool delay_autosuspend(struct r8 */ if (!sw_linking && tp->rtl_ops.in_nway(tp)) return true; + else if (!skb_queue_empty(&tp->tx_queue)) + return true; else return false; }
-static int rtl8152_rumtime_suspend(struct r8152 *tp) +static int rtl8152_runtime_suspend(struct r8152 *tp) { struct net_device *netdev = tp->netdev; int ret = 0;
+ set_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { u32 rcr = 0;
if (delay_autosuspend(tp)) { + clear_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); ret = -EBUSY; goto out1; } @@@ -3601,6 -3620,8 +3620,8 @@@ if (!(ocp_data & RXFIFO_EMPTY)) { rxdy_gated_en(tp, false); ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + clear_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); ret = -EBUSY; goto out1; } @@@ -3620,8 -3641,6 +3641,6 @@@ } }
- set_bit(SELECTIVE_SUSPEND, &tp->flags); - out1: return ret; } @@@ -3653,7 -3672,7 +3672,7 @@@ static int rtl8152_suspend(struct usb_i mutex_lock(&tp->control);
if (PMSG_IS_AUTO(message)) - ret = rtl8152_rumtime_suspend(tp); + ret = rtl8152_runtime_suspend(tp); else ret = rtl8152_system_suspend(tp);
@@@ -3677,12 -3696,15 +3696,15 @@@ static int rtl8152_resume(struct usb_in if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) { if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { tp->rtl_ops.autosuspend_en(tp, false); - clear_bit(SELECTIVE_SUSPEND, &tp->flags); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(tp->netdev)) rtl_start_rx(tp); napi_enable(&tp->napi); + clear_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); + if (!list_empty(&tp->rx_done)) + napi_schedule(&tp->napi); } else { tp->rtl_ops.up(tp); netif_carrier_off(tp->netdev); diff --combined drivers/net/virtio_net.c index f9bf948,765c2d6..bd22cf3 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@@ -23,7 -23,6 +23,7 @@@ #include <linux/virtio.h> #include <linux/virtio_net.h> #include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/scatterlist.h> #include <linux/if_vlan.h> #include <linux/slab.h> @@@ -49,8 -48,16 +49,16 @@@ module_param(gso, bool, 0444) */ DECLARE_EWMA(pkt_len, 1, 64)
+ /* With mergeable buffers we align buffer address and use the low bits to + * encode its true size. Buffer size is up to 1 page so we need to align to + * square root of page size to ensure we reserve enough bits to encode the true + * size. + */ + #define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2) + /* Minimum alignment for mergeable packet buffers. */ - #define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256) + #define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \ + 1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT)
#define VIRTNET_DRIVER_VERSION "1.0.0"
@@@ -331,7 -338,7 +339,7 @@@ static struct sk_buff *page_to_skb(stru return skb; }
-static void virtnet_xdp_xmit(struct virtnet_info *vi, +static bool virtnet_xdp_xmit(struct virtnet_info *vi, struct receive_queue *rq, struct send_queue *sq, struct xdp_buff *xdp, @@@ -383,12 -390,10 +391,12 @@@ put_page(page); } else /* small buffer */ kfree_skb(data); - return; // On error abort to avoid unnecessary kick + /* On error abort to avoid unnecessary kick */ + return false; }
virtqueue_kick(sq->vq); + return true; }
static u32 do_xdp_prog(struct virtnet_info *vi, @@@ -424,14 -429,11 +432,14 @@@ vi->xdp_queue_pairs + smp_processor_id(); xdp.data = buf; - virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, data); + if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, + data))) + trace_xdp_exception(vi->dev, xdp_prog, act); return XDP_TX; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(vi->dev, xdp_prog, act); case XDP_DROP: return XDP_DROP; } @@@ -1110,7 -1112,7 +1118,7 @@@ static int xmit_skb(struct send_queue * hdr = skb_vnet_hdr(skb);
if (virtio_net_hdr_from_skb(skb, &hdr->hdr, - virtio_is_little_endian(vi->vdev))) + virtio_is_little_endian(vi->vdev), false)) BUG();
if (vi->mergeable_rx_bufs) @@@ -1242,9 -1244,10 +1250,9 @@@ static int virtnet_set_mac_address(stru struct sockaddr *addr; struct scatterlist sg;
- addr = kmalloc(sizeof(*addr), GFP_KERNEL); + addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); if (!addr) return -ENOMEM; - memcpy(addr, p, sizeof(*addr));
ret = eth_prepare_mac_addr_change(dev, addr); if (ret) @@@ -1278,8 -1281,8 +1286,8 @@@ out return ret; }
-static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static void virtnet_stats(struct net_device *dev, + struct rtnl_link_stats64 *tot) { struct virtnet_info *vi = netdev_priv(dev); int cpu; @@@ -1312,6 -1315,8 +1320,6 @@@ tot->rx_dropped = dev->stats.rx_dropped; tot->rx_length_errors = dev->stats.rx_length_errors; tot->rx_frame_errors = dev->stats.rx_frame_errors; - - return tot; }
#ifdef CONFIG_NET_POLL_CONTROLLER @@@ -1710,6 -1715,11 +1718,11 @@@ static int virtnet_xdp_set(struct net_d u16 xdp_qp = 0, curr_qp; int i, err;
+ if (prog && prog->xdp_adjust_head) { + netdev_warn(dev, "Does not support bpf_xdp_adjust_head()\n"); + return -EOPNOTSUPP; + } + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || @@@ -1893,8 -1903,12 +1906,12 @@@ static void free_receive_page_frags(str put_page(vi->rq[i].alloc_frag.page); }
- static bool is_xdp_queue(struct virtnet_info *vi, int q) + static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) { + /* For small receive mode always use kfree_skb variants */ + if (!vi->mergeable_rx_bufs) + return false; + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) return false; else if (q < vi->curr_queue_pairs) @@@ -1911,7 -1925,7 +1928,7 @@@ static void free_unused_bufs(struct vir for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_queue(vi, i)) + if (!is_xdp_raw_buffer_queue(vi, i)) dev_kfree_skb(buf); else put_page(virt_to_head_page(buf)); diff --combined drivers/net/vxlan.c index 19b1653,50b62db..2e48ce2 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@@ -1951,7 -1951,7 +1951,7 @@@ static void vxlan_encap_bypass(struct s
static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *vxlan, union vxlan_addr *daddr, - __be32 dst_port, __be32 vni, struct dst_entry *dst, + __be16 dst_port, __be32 vni, struct dst_entry *dst, u32 rt_flags) { #if IS_ENABLED(CONFIG_IPV6) @@@ -2268,7 -2268,7 +2268,7 @@@ static void vxlan_cleanup(unsigned lon = container_of(p, struct vxlan_fdb, hlist); unsigned long timeout;
- if (f->state & NUD_PERMANENT) + if (f->state & (NUD_PERMANENT | NUD_NOARP)) continue;
timeout = f->used + vxlan->cfg.age_interval * HZ; @@@ -2354,7 -2354,7 +2354,7 @@@ static int vxlan_open(struct net_devic }
/* Purge the forwarding table */ - static void vxlan_flush(struct vxlan_dev *vxlan) + static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) { unsigned int h;
@@@ -2364,6 -2364,8 +2364,8 @@@ hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { struct vxlan_fdb *f = container_of(p, struct vxlan_fdb, hlist); + if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP))) + continue; /* the all_zeros_mac entry is deleted at vxlan_uninit */ if (!is_zero_ether_addr(f->eth_addr)) vxlan_fdb_destroy(vxlan, f); @@@ -2385,7 -2387,7 +2387,7 @@@ static int vxlan_stop(struct net_devic
del_timer_sync(&vxlan->age_timer);
- vxlan_flush(vxlan); + vxlan_flush(vxlan, false); vxlan_sock_release(vxlan);
return ret; @@@ -2890,7 -2892,7 +2892,7 @@@ static int vxlan_dev_configure(struct n memcpy(&vxlan->cfg, conf, sizeof(*conf)); if (!vxlan->cfg.dst_port) { if (conf->flags & VXLAN_F_GPE) - vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */ + vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ else vxlan->cfg.dst_port = default_port; } @@@ -3058,6 -3060,8 +3060,8 @@@ static void vxlan_dellink(struct net_de struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ vxlan_flush(vxlan, true); + spin_lock(&vn->sock_lock); if (!hlist_unhashed(&vxlan->hlist)) hlist_del_rcu(&vxlan->hlist); diff --combined drivers/net/xen-netfront.c index 40f26b6,8315fe7..2c7c29f --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@@ -321,7 -321,7 +321,7 @@@ static void xennet_alloc_rx_buffers(str queue->rx.req_prod_pvt = req_prod;
/* Not enough requests? Try again later. */ - if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) { + if (req_prod - queue->rx.sring->req_prod < NET_RX_SLOTS_MIN) { mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); return; } @@@ -1073,8 -1073,8 +1073,8 @@@ static int xennet_change_mtu(struct net return 0; }
-static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static void xennet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { struct netfront_info *np = netdev_priv(dev); int cpu; @@@ -1105,6 -1105,8 +1105,6 @@@
tot->rx_errors = dev->stats.rx_errors; tot->tx_dropped = dev->stats.tx_dropped; - - return tot; }
static void xennet_release_tx_bufs(struct netfront_queue *queue) diff --combined include/linux/bpf.h index 5d417ea,3ed1f3b..57d60dc --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -69,14 -69,14 +69,14 @@@ enum bpf_arg_type /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack */ - ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ - ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not - * need to be initialized, helper function must fill - * all bytes or clear them in error case. + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, + * helper function must fill all bytes or clear + * them in error case. */
- ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ - ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ + ARG_CONST_SIZE, /* number of bytes accessed from memory */ + ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ @@@ -161,10 -161,9 +161,10 @@@ struct bpf_verifier_ops enum bpf_reg_type *reg_type); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); - u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, - struct bpf_insn *insn, struct bpf_prog *prog); + u32 (*convert_ctx_access)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog); };
struct bpf_prog_type_list { @@@ -248,6 -247,8 +248,8 @@@ struct bpf_map * __must_check bpf_map_i void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); + void *bpf_map_area_alloc(size_t size); + void bpf_map_area_free(void *base);
extern int sysctl_unprivileged_bpf_disabled;
diff --combined include/linux/phy.h index 5c9d252,7fc1105..43474f3 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@@ -25,7 -25,6 +25,6 @@@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/mod_devicetable.h> - #include <linux/phy_led_triggers.h>
#include <linux/atomic.h>
@@@ -158,7 -157,11 +157,7 @@@ static inline const char *phy_modes(phy /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ #define PHY_ID_FMT "%s:%02x"
-/* - * Need to be a little smaller than phydev->dev.bus_id to leave room - * for the ":%02x" - */ -#define MII_BUS_ID_SIZE (20 - 3) +#define MII_BUS_ID_SIZE 61
/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. */ @@@ -628,7 -631,7 +627,7 @@@ struct phy_driver /* A Structure for boards to register fixups with the PHY Lib */ struct phy_fixup { struct list_head list; - char bus_id[20]; + char bus_id[MII_BUS_ID_SIZE + 3]; u32 phy_uid; u32 phy_uid_mask; int (*run)(struct phy_device *phydev); diff --combined kernel/bpf/syscall.c index 05ad086,19b6129..08a4d28 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@@ -10,9 -10,10 +10,11 @@@ * General Public License for more details. */ #include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/syscalls.h> #include <linux/slab.h> + #include <linux/vmalloc.h> + #include <linux/mmzone.h> #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/license.h> @@@ -50,6 -51,30 +52,30 @@@ void bpf_register_map_type(struct bpf_m list_add(&tl->list_node, &bpf_map_types); }
+ void *bpf_map_area_alloc(size_t size) + { + /* We definitely need __GFP_NORETRY, so OOM killer doesn't + * trigger under memory pressure as we really just want to + * fail instead. + */ + const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + void *area; + + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { + area = kmalloc(size, GFP_USER | flags); + if (area != NULL) + return area; + } + + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, + PAGE_KERNEL); + } + + void bpf_map_area_free(void *area) + { + kvfree(area); + } + int bpf_map_precharge_memlock(u32 pages) { struct user_struct *user = get_current_user(); @@@ -216,7 -241,6 +242,7 @@@ static int map_create(union bpf_attr *a /* failed to allocate fd */ goto free_map;
+ trace_bpf_map_create(map, err); return err;
free_map: @@@ -341,7 -365,6 +367,7 @@@ static int map_lookup_elem(union bpf_at if (copy_to_user(uvalue, value, value_size) != 0) goto free_value;
+ trace_bpf_map_lookup_elem(map, ufd, key, value); err = 0;
free_value: @@@ -424,8 -447,6 +450,8 @@@ static int map_update_elem(union bpf_at __this_cpu_dec(bpf_prog_active); preempt_enable();
+ if (!err) + trace_bpf_map_update_elem(map, ufd, key, value); free_value: kfree(value); free_key: @@@ -471,8 -492,6 +497,8 @@@ static int map_delete_elem(union bpf_at __this_cpu_dec(bpf_prog_active); preempt_enable();
+ if (!err) + trace_bpf_map_delete_elem(map, ufd, key); free_key: kfree(key); err_put: @@@ -525,7 -544,6 +551,7 @@@ static int map_get_next_key(union bpf_a if (copy_to_user(unext_key, next_key, map->key_size) != 0) goto free_next_key;
+ trace_bpf_map_next_key(map, ufd, key, next_key); err = 0;
free_next_key: @@@ -679,10 -697,8 +705,10 @@@ static void __bpf_prog_put_rcu(struct r
void bpf_prog_put(struct bpf_prog *prog) { - if (atomic_dec_and_test(&prog->aux->refcnt)) + if (atomic_dec_and_test(&prog->aux->refcnt)) { + trace_bpf_prog_put_rcu(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + } } EXPORT_SYMBOL_GPL(bpf_prog_put);
@@@ -791,11 -807,7 +817,11 @@@ struct bpf_prog *bpf_prog_get(u32 ufd
struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { - return __bpf_prog_get(ufd, &type); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type); + + if (!IS_ERR(prog)) + trace_bpf_prog_get_type(prog); + return prog; } EXPORT_SYMBOL_GPL(bpf_prog_get_type);
@@@ -877,7 -889,6 +903,7 @@@ static int bpf_prog_load(union bpf_att /* failed to allocate fd */ goto free_used_maps;
+ trace_bpf_prog_load(prog, err); return err;
free_used_maps: diff --combined net/batman-adv/fragmentation.c index 42bfbd8,0854ebd..ead18ca --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@@ -1,4 -1,4 +1,4 @@@ -/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Martin Hundebøll martin@hundeboll.net * @@@ -474,7 -474,7 +474,7 @@@ int batadv_frag_send_packet(struct sk_b primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) { ret = -EINVAL; - goto put_primary_if; + goto free_skb; }
/* Create one header to be copied to all fragments */ @@@ -502,7 -502,7 +502,7 @@@ skb_fragment = batadv_frag_create(skb, &frag_header, mtu); if (!skb_fragment) { ret = -ENOMEM; - goto free_skb; + goto put_primary_if; }
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); @@@ -511,7 -511,7 +511,7 @@@ ret = batadv_send_unicast_skb(skb_fragment, neigh_node); if (ret != NET_XMIT_SUCCESS) { ret = NET_XMIT_DROP; - goto free_skb; + goto put_primary_if; }
frag_header.no++; @@@ -519,7 -519,7 +519,7 @@@ /* The initial check in this function should cover this case */ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { ret = -EINVAL; - goto free_skb; + goto put_primary_if; } }
@@@ -527,7 -527,7 +527,7 @@@ if (batadv_skb_head_push(skb, header_size) < 0 || pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { ret = -ENOMEM; - goto free_skb; + goto put_primary_if; }
memcpy(skb->data, &frag_header, header_size); diff --combined net/bridge/br_netlink.c index 6c087cd,7109b38..1ca2549 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@@ -123,7 -123,6 +123,7 @@@ static inline size_t br_port_info_size( + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_MCAST_TO_UCAST */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ @@@ -174,8 -173,6 +174,8 @@@ static int br_port_fill_attrs(struct sk !!(p->flags & BR_ROOT_BLOCK)) || nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_MCAST_TO_UCAST, + !!(p->flags & BR_MULTICAST_TO_UNICAST)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || @@@ -589,7 -586,6 +589,7 @@@ static const struct nla_policy br_port_ [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 }, + [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, };
/* Change the state of the port and notify spanning tree */ @@@ -640,7 -636,6 +640,7 @@@ static int br_setport(struct net_bridge br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); + br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
@@@ -786,20 -781,6 +786,6 @@@ static int br_validate(struct nlattr *t return 0; }
- static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) - { - struct net_bridge *br = netdev_priv(dev); - - if (tb[IFLA_ADDRESS]) { - spin_lock_bh(&br->lock); - br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); - spin_unlock_bh(&br->lock); - } - - return register_netdevice(dev); - } - static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], @@@ -1120,6 -1101,25 +1106,25 @@@ static int br_changelink(struct net_dev return 0; }
+ static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) + { + struct net_bridge *br = netdev_priv(dev); + int err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = br_changelink(dev, tb, data); + if (err) + return err; + + return register_netdevice(dev); + } + static size_t br_get_size(const struct net_device *brdev) { return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ diff --combined net/core/dev.c index c8f1f67,7f218e0..be11aba --- a/net/core/dev.c +++ b/net/core/dev.c @@@ -2408,6 -2408,28 +2408,6 @@@ void netif_schedule_queue(struct netdev } EXPORT_SYMBOL(netif_schedule_queue);
-/** - * netif_wake_subqueue - allow sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Resume individual transmit queue of a device with multiple transmit queues. - */ -void netif_wake_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) { - struct Qdisc *q; - - rcu_read_lock(); - q = rcu_dereference(txq->qdisc); - __netif_schedule(q); - rcu_read_unlock(); - } -} -EXPORT_SYMBOL(netif_wake_subqueue); - void netif_tx_wake_queue(struct netdev_queue *dev_queue) { if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) { @@@ -2773,9 -2795,9 +2773,9 @@@ static netdev_features_t harmonize_feat if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - } else if (illegal_highdma(skb->dev, skb)) { - features &= ~NETIF_F_SG; } + if (illegal_highdma(skb->dev, skb)) + features &= ~NETIF_F_SG;
return features; } @@@ -3131,7 -3153,9 +3131,7 @@@ sch_handle_egress(struct sk_buff *skb, if (!cl) return skb;
- /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set - * earlier by the caller. - */ + /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) { @@@ -3296,7 -3320,7 +3296,7 @@@ static int __dev_queue_xmit(struct sk_b
qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); + skb->tc_at_ingress = 0; # ifdef CONFIG_NET_EGRESS if (static_key_false(&egress_needed)) { skb = sch_handle_egress(skb, &rc, dev); @@@ -3403,11 -3427,7 +3403,11 @@@ EXPORT_SYMBOL(netdev_max_backlog)
int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; -int weight_p __read_mostly = 64; /* old backlog weight */ +int weight_p __read_mostly = 64; /* old backlog weight */ +int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ +int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ +int dev_rx_weight __read_mostly = 64; +int dev_tx_weight __read_mostly = 64;
/* Called with irq disabled */ static inline void ____napi_schedule(struct softnet_data *sd, @@@ -3896,7 -3916,7 +3896,7 @@@ sch_handle_ingress(struct sk_buff *skb }
qdisc_skb_cb(skb)->pkt_len = skb->len; - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); + skb->tc_at_ingress = 1; qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) { @@@ -3961,7 -3981,9 +3961,7 @@@ int netdev_rx_handler_register(struct n rx_handler_func_t *rx_handler, void *rx_handler_data) { - ASSERT_RTNL(); - - if (dev->rx_handler) + if (netdev_is_rx_handler_busy(dev)) return -EBUSY;
/* Note: rx_handler_data must be set before rx_handler */ @@@ -4067,8 -4089,12 +4067,8 @@@ another_round goto out; }
-#ifdef CONFIG_NET_CLS_ACT - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - goto ncls; - } -#endif + if (skb_skip_tc_classify(skb)) + goto skip_classify;
if (pfmemalloc) goto skip_taps; @@@ -4096,8 -4122,10 +4096,8 @@@ skip_taps goto out; } #endif -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -ncls: -#endif + skb_reset_tc(skb); +skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop;
@@@ -4807,7 -4835,7 +4807,7 @@@ static int process_backlog(struct napi_ net_rps_action_and_irq_enable(sd); }
- napi->weight = weight_p; + napi->weight = dev_rx_weight; while (again) { struct sk_buff *skb;
diff --combined net/core/lwt_bpf.c index 40ef8ae,b3eef90..0360045 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@@ -352,7 -352,7 +352,7 @@@ static int bpf_encap_nlsize(struct lwtu 0; }
-int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) +static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) { /* FIXME: * The LWT state is currently rebuilt for delete requests which @@@ -386,6 -386,7 +386,7 @@@ static const struct lwtunnel_encap_ops .fill_encap = bpf_fill_encap_info, .get_encap_size = bpf_encap_nlsize, .cmp_encap = bpf_encap_cmp, + .owner = THIS_MODULE, };
static int __init bpf_lwt_init(void) diff --combined net/dccp/ipv6.c index 08bcdc3,c4e879c..cef60a4 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@@ -227,7 -227,7 +227,7 @@@ static int dccp_v6_send_response(const opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } @@@ -281,7 -281,7 +281,7 @@@ static void dccp_v6_ctl_send_reset(cons dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); + ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; @@@ -937,6 -937,7 +937,6 @@@ static const struct inet_connection_soc .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@@ -957,6 -958,7 +957,6 @@@ static const struct inet_connection_soc .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, diff --combined net/dsa/slave.c index b8e5868,7d45961..9750dd6 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@@ -673,6 -673,7 +673,6 @@@ static void dsa_slave_get_drvinfo(struc struct ethtool_drvinfo *drvinfo) { strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, dsa_driver_version, sizeof(drvinfo->version)); strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); } @@@ -781,7 -782,7 +781,7 @@@ static void dsa_cpu_port_get_ethtool_st uint64_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->ds[0]; + struct dsa_switch *ds = dst->cpu_switch; s8 cpu_port = dst->cpu_port; int count = 0;
@@@ -798,7 -799,7 +798,7 @@@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->ds[0]; + struct dsa_switch *ds = dst->cpu_switch; int count = 0;
if (dst->master_ethtool_ops.get_sset_count) @@@ -814,7 -815,7 +814,7 @@@ static void dsa_cpu_port_get_strings(st uint32_t stringset, uint8_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->ds[0]; + struct dsa_switch *ds = dst->cpu_switch; s8 cpu_port = dst->cpu_port; int len = ETH_GSTRING_LEN; int mcount = 0, count; @@@ -983,17 -984,6 +983,17 @@@ static void dsa_slave_poll_controller(s } #endif
+static int dsa_slave_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (snprintf(name, len, "p%d", p->port) >= len) + return -EINVAL; + + return 0; +} + void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) { ops->get_sset_count = dsa_cpu_port_get_sset_count; @@@ -1041,7 -1031,6 +1041,7 @@@ static const struct net_device_ops dsa_ .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, };
static const struct switchdev_ops dsa_slave_switchdev_ops = { @@@ -1116,10 -1105,8 +1116,8 @@@ static int dsa_slave_phy_connect(struc /* Use already configured phy mode */ if (p->phy_interface == PHY_INTERFACE_MODE_NA) p->phy_interface = p->phy->interface; - phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); - - return 0; + return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); }
static int dsa_slave_phy_setup(struct dsa_slave_priv *p, @@@ -1214,6 -1201,8 +1212,8 @@@ int dsa_slave_suspend(struct net_devic { struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ netif_device_detach(slave_dev); + if (p->phy) { phy_stop(p->phy); p->old_pause = -1; diff --combined net/ipv4/ip_tunnel_core.c index 5476110,0fd1976..9d6c100 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@@ -188,8 -188,8 +188,8 @@@ int iptunnel_handle_offloads(struct sk_ EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
/* Often modified stats are per cpu, other are shared (netdev->stats) */ -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +void ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { int i;
@@@ -214,6 -214,8 +214,6 @@@ tot->rx_bytes += rx_bytes; tot->tx_bytes += tx_bytes; } - - return tot; } EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
@@@ -311,6 -313,7 +311,7 @@@ static const struct lwtunnel_encap_ops .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, };
static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { @@@ -401,6 -404,7 +402,7 @@@ static const struct lwtunnel_encap_ops .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, };
void __init ip_tunnel_core_init(void) diff --combined net/ipv4/tcp_fastopen.c index 9674bec,dd2560c..8ea4e97 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@@ -205,6 -205,7 +205,7 @@@ static struct sock *tcp_fastopen_create * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd;
/* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash @@@ -325,57 -326,3 +326,57 @@@ fastopen *foc = valid_foc; return NULL; } + +bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) +{ + unsigned long last_syn_loss = 0; + int syn_loss = 0; + + tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); + + /* Recurring FO SYN losses: no cookie or data in SYN */ + if (syn_loss > 1 && + time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { + cookie->len = -1; + return false; + } + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + cookie->len = -1; + return true; + } + return cookie->len > 0; +} + +/* This function checks if we want to defer sending SYN until the first + * write(). We defer under the following conditions: + * 1. fastopen_connect sockopt is set + * 2. we have a valid cookie + * Return value: return true if we want to defer until application writes data + * return false if we want to send out SYN immediately + */ +bool tcp_fastopen_defer_connect(struct sock *sk, int *err) +{ + struct tcp_fastopen_cookie cookie = { .len = 0 }; + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (tp->fastopen_connect && !tp->fastopen_req) { + if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { + inet_sk(sk)->defer_connect = 1; + return true; + } + + /* Alloc fastopen_req in order for FO option to be included + * in SYN + */ + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), + sk->sk_allocation); + if (tp->fastopen_req) + tp->fastopen_req->cookie = cookie; + else + *err = -ENOBUFS; + } + return false; +} +EXPORT_SYMBOL(tcp_fastopen_defer_connect); diff --combined net/ipv4/tcp_input.c index 3de6eba,41dcbd5..27c95ac --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@@ -79,7 -79,7 +79,7 @@@ int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; -int sysctl_tcp_fack __read_mostly = 1; +int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; @@@ -95,6 -95,9 +95,6 @@@ int sysctl_tcp_rfc1337 __read_mostly int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; - -int sysctl_tcp_thin_dupack __read_mostly; - int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; @@@ -901,6 -904,8 +901,6 @@@ static void tcp_update_reordering(struc tcp_disable_fack(tp); }
- if (metric > 0) - tcp_disable_early_retrans(tp); tp->rack.reord = 1; }
@@@ -911,6 -916,10 +911,6 @@@ static void tcp_verify_retransmit_hint( before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) tp->retransmit_skb_hint = skb; - - if (!tp->lost_out || - after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; }
/* Sum the number of packets on the wire we have marked as lost. @@@ -1126,7 -1135,6 +1126,7 @@@ struct tcp_sacktag_state */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */ struct rate_sample *rate; int flag; }; @@@ -1209,8 -1217,7 +1209,8 @@@ static u8 tcp_sacktag_one(struct sock * return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) { - tcp_rack_advance(tp, xmit_time, sacked); + tcp_rack_advance(tp, sacked, end_seq, + xmit_time, &state->ack_time);
if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, @@@ -1930,6 -1937,7 +1930,6 @@@ void tcp_enter_loss(struct sock *sk struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sk_buff *skb; - bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ bool mark_lost;
@@@ -1974,6 -1982,7 +1974,6 @@@ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@@ -1989,15 -1998,13 +1989,15 @@@ tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp);
- /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous - * loss recovery is underway except recurring timeout(s) on - * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO + * if a previous recovery is underway, otherwise it may incorrectly + * call a timeout spurious if some previously retransmitted packets + * are s/acked (sec 3.2). We do not apply that retriction since + * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS + * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO + * on PTMU discovery to avoid sending new data. */ - tp->frto = sysctl_tcp_frto && - (new_recovery || icsk->icsk_retransmits) && - !inet_csk(sk)->icsk_mtup.probe_size; + tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size; }
/* If ACK arrived pointing to a remembered SACK, it means that our @@@ -2049,6 -2056,30 +2049,6 @@@ static inline int tcp_dupack_heuristics return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; }
-static bool tcp_pause_early_retransmit(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned long delay; - - /* Delay early retransmit and entering fast recovery for - * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples - * available, or RTO is scheduled to fire first. - */ - if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt_us) - return false; - - delay = max(usecs_to_jiffies(tp->srtt_us >> 5), - msecs_to_jiffies(2)); - - if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) - return false; - - inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, - TCP_RTO_MAX); - return true; -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@@ -2096,26 -2127,10 +2096,26 @@@ * F.e. after RTO, when all the queue is considered as lost, * lost_out = packets_out and in_flight = retrans_out. * - * Essentially, we have now two algorithms counting + * Essentially, we have now a few algorithms detecting * lost packets. * - * FACK: It is the simplest heuristics. As soon as we decided + * If the receiver supports SACK: + * + * RFC6675/3517: It is the conventional algorithm. A packet is + * considered lost if the number of higher sequence packets + * SACKed is greater than or equal the DUPACK thoreshold + * (reordering). This is implemented in tcp_mark_head_lost and + * tcp_update_scoreboard. + * + * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm + * (2017-) that checks timing instead of counting DUPACKs. + * Essentially a packet is considered lost if it's not S/ACKed + * after RTT + reordering_window, where both metrics are + * dynamically measured and adjusted. This is implemented in + * tcp_rack_mark_lost. + * + * FACK (Disabled by default. Subsumbed by RACK): + * It is the simplest heuristics. As soon as we decided * that something is lost, we decide that _all_ not SACKed * packets until the most forward SACK are lost. I.e. * lost_out = fackets_out - sacked_out and left_out = fackets_out. @@@ -2124,14 -2139,16 +2124,14 @@@ * takes place. We use FACK by default until reordering * is suspected on the path to this destination. * - * NewReno: when Recovery is entered, we assume that one segment + * If the receiver does not support SACK: + * + * NewReno (RFC6582): in Recovery we assume that one segment * is lost (classic Reno). While we are in Recovery and * a partial ACK arrives, we assume that one more packet * is lost (NewReno). This heuristics are the same in NewReno * and SACK. * - * Imagine, that's all! Forget about all this shamanism about CWND inflation - * deflation etc. CWND is real congestion window, never inflated, changes - * only according to classic VJ rules. - * * Really tricky (and requiring careful tuning) part of algorithm * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, @@@ -2159,6 -2176,8 +2159,6 @@@ static bool tcp_time_to_recover(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - __u32 packets_out; - int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
/* Trick#1: The loss is proven. */ if (tp->lost_out) @@@ -2168,6 -2187,39 +2168,6 @@@ if (tcp_dupack_heuristics(tp) > tp->reordering) return true;
- /* Trick#4: It is still not OK... But will it be useful to delay - * recovery more? - */ - packets_out = tp->packets_out; - if (packets_out <= tp->reordering && - tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) && - !tcp_may_send_now(sk)) { - /* We have nothing to send. This connection is limited - * either by receiver window or by application. - */ - return true; - } - - /* If a thin stream is detected, retransmit after first - * received dupack. Employ only if SACK is supported in order - * to avoid possible corner-case series of spurious retransmissions - * Use only if there are no unsent data. - */ - if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && - tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && - tcp_is_sack(tp) && !tcp_send_head(sk)) - return true; - - /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious - * retransmissions due to small network reorderings, we implement - * Mitigation A.3 in the RFC and delay the retransmission for a short - * interval if appropriate. - */ - if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && - !tcp_may_send_now(sk)) - return !tcp_pause_early_retransmit(sk, flag); - return false; }
@@@ -2469,7 -2521,8 +2469,7 @@@ static void tcp_init_cwnd_reduction(str tcp_ecn_queue_cwr(tp); }
-static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, - int flag) +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; @@@ -2637,7 -2690,7 +2637,7 @@@ void tcp_simple_retransmit(struct sock } EXPORT_SYMBOL(tcp_simple_retransmit);
-static void tcp_enter_recovery(struct sock *sk, bool ece_ack) +void tcp_enter_recovery(struct sock *sk, bool ece_ack) { struct tcp_sock *tp = tcp_sk(sk); int mib_idx; @@@ -2673,18 -2726,14 +2673,18 @@@ static void tcp_process_loss(struct soc tcp_try_undo_loss(sk, false)) return;
- if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ - /* Step 3.b. A timeout is spurious if not all data are - * lost, i.e., never-retransmitted data are (s)acked. - */ - if ((flag & FLAG_ORIG_SACK_ACKED) && - tcp_try_undo_loss(sk, true)) - return; + /* The ACK (s)acks some never-retransmitted data meaning not all + * the data packets before the timeout were lost. Therefore we + * undo the congestion window and state. This is essentially + * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since + * a retransmitted skb is permantly marked, we can apply such an + * operation even if F-RTO was not used. + */ + if ((flag & FLAG_ORIG_SACK_ACKED) && + tcp_try_undo_loss(sk, tp->undo_marker)) + return;
+ if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ if (after(tp->snd_nxt, tp->high_seq)) { if (flag & FLAG_DATA_SACKED || is_dupack) tp->frto = 0; /* Step 3.a. loss was real */ @@@ -2751,21 -2800,6 +2751,21 @@@ static bool tcp_try_undo_partial(struc return false; }
+static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag, + const struct skb_mstamp *ack_time) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* Use RACK to detect loss */ + if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { + u32 prior_retrans = tp->retrans_out; + + tcp_rack_mark_lost(sk, ack_time); + if (prior_retrans > tp->retrans_out) + *ack_flag |= FLAG_LOST_RETRANS; + } +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@@ -2779,8 -2813,7 +2779,8 @@@ * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, const int acked, - bool is_dupack, int *ack_flag, int *rexmit) + bool is_dupack, int *ack_flag, int *rexmit, + const struct skb_mstamp *ack_time) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@@ -2831,6 -2864,13 +2831,6 @@@ } }
- /* Use RACK to detect loss */ - if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && - tcp_rack_mark_lost(sk)) { - flag |= FLAG_LOST_RETRANS; - *ack_flag |= FLAG_LOST_RETRANS; - } - /* E. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: @@@ -2848,13 -2888,11 +2848,13 @@@ tcp_try_keep_open(sk); return; } + tcp_rack_identify_loss(sk, ack_flag, ack_time); break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack, rexmit); - if (icsk->icsk_ca_state != TCP_CA_Open && - !(flag & FLAG_LOST_RETRANS)) + tcp_rack_identify_loss(sk, ack_flag, ack_time); + if (!(icsk->icsk_ca_state == TCP_CA_Open || + (*ack_flag & FLAG_LOST_RETRANS))) return; /* Change state if cwnd is undone or retransmits are lost */ default: @@@ -2868,7 -2906,6 +2868,7 @@@ if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk);
+ tcp_rack_identify_loss(sk, ack_flag, ack_time); if (!tcp_time_to_recover(sk, flag)) { tcp_try_to_open(sk, flag); return; @@@ -2987,7 -3024,7 +2987,7 @@@ void tcp_rearm_rto(struct sock *sk } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = @@@ -3004,6 -3041,24 +3004,6 @@@ } }
-/* This function is called when the delayed ER timer fires. TCP enters - * fast recovery and performs fast-retransmit. - */ -void tcp_resume_early_retransmit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_rearm_rto(sk); - - /* Stop if ER is disabled after the delayed ER timer is scheduled */ - if (!tp->do_early_retrans) - return; - - tcp_enter_recovery(sk, false); - tcp_update_scoreboard(sk, 1); - tcp_xmit_retransmit_queue(sk); -} - /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@@ -3046,11 -3101,11 +3046,11 @@@ static void tcp_ack_tstamp(struct sock */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack, - struct skb_mstamp *now) + struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); struct skb_mstamp first_ackt, last_ackt; + struct skb_mstamp *now = &sack->ack_time; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@@ -3110,9 -3165,7 +3110,9 @@@ } else if (tcp_is_sack(tp)) { tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) - tcp_rack_advance(tp, &skb->skb_mstamp, sacked); + tcp_rack_advance(tp, sacked, scb->end_seq, + &skb->skb_mstamp, + &sack->ack_time); } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@@ -3542,6 -3595,7 +3542,6 @@@ static int tcp_ack(struct sock *sk, con u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ - struct skb_mstamp now;
sack_state.first_sackt.v64 = 0; sack_state.rate = &rs; @@@ -3567,9 -3621,10 +3567,9 @@@ if (after(ack, tp->snd_nxt)) goto invalid_ack;
- skb_mstamp_get(&now); + skb_mstamp_get(&sack_state.ack_time);
- if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk);
if (after(ack, prior_snd_una)) { @@@ -3634,12 -3689,11 +3634,12 @@@
/* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state, &now); + &sack_state);
if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, + &sack_state.ack_time); } if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@@ -3654,17 -3708,15 +3654,17 @@@ tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, &now, &rs); - tcp_cong_control(sk, ack, delivered, flag, &rs); + tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time, + sack_state.rate); + tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1;
no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, + &sack_state.ack_time); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@@ -3685,11 -3737,9 +3685,11 @@@ old_ack * If data was DSACKed, see if we can undo a cwnd reduction. */ if (TCP_SKB_CB(skb)->sacked) { + skb_mstamp_get(&sack_state.ack_time); flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, + &sack_state.ack_time); tcp_xmit_recovery(sk, rexmit); }
@@@ -4507,7 -4557,6 +4507,7 @@@ add_sack end: if (skb) { tcp_grow_window(sk, skb); + skb_condense(skb); skb_set_owner_r(skb, sk); } } @@@ -5029,7 -5078,7 +5029,7 @@@ static void tcp_check_space(struct soc if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ - smp_mb__after_atomic(); + smp_mb(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { tcp_new_space(sk); @@@ -5200,23 -5249,6 +5200,23 @@@ static int tcp_copy_to_iovec(struct soc return err; }
+/* Accept RST for rcv_nxt - 1 after a FIN. + * When tcp connections are abruptly terminated from Mac OSX (via ^C), a + * FIN is sent followed by a RST packet. The RST is sent with the same + * sequence number as the FIN, and thus according to RFC 5961 a challenge + * ACK should be sent. However, Mac OSX rate limits replies to challenge + * ACKs on the closed socket. In addition middleboxes can drop either the + * challenge ACK or a subsequent RST. + */ +static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) && + (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | + TCPF_CLOSING)); +} + /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@@ -5255,25 -5287,20 +5255,25 @@@ static bool tcp_validate_incoming(struc LINUX_MIB_TCPACKSKIPPEDSEQ, &tp->last_oow_ack_time)) tcp_send_dupack(sk, skb); + } else if (tcp_reset_check(sk, skb)) { + tcp_reset(sk); } goto discard; }
/* Step 2: check RST bit */ if (th->rst) { - /* RFC 5961 3.2 (extend to match against SACK too if available): - * If seq num matches RCV.NXT or the right-most SACK block, + /* RFC 5961 3.2 (extend to match against (RCV.NXT - 1) after a + * FIN and SACK too if available): + * If seq num matches RCV.NXT or (RCV.NXT - 1) after a FIN, or + * the right-most SACK block, * then * RESET the connection * else * Send a challenge ACK */ - if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { + if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt || + tcp_reset_check(sk, skb)) { rst_seq_match = true; } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) { struct tcp_sack_block *sp = &tp->selective_acks[0]; @@@ -6336,7 -6363,7 +6336,7 @@@ int tcp_conn_request(struct request_soc * timewait bucket, so that all the necessary checks * are made in the function processing timewait state. */ - if (tcp_death_row.sysctl_tw_recycle) { + if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { bool strict;
dst = af_ops->route_req(sk, &fl, req, &strict); @@@ -6350,8 -6377,8 +6350,8 @@@ } /* Kill the following clause, if you dislike this way. */ else if (!net->ipv4.sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && + (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst, false, tmp_opt.saw_tstamp)) { /* Without syncookies last quarter of diff --combined net/ipv6/addrconf.c index 4c47656,f60e88e..156ed57 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@@ -243,7 -243,6 +243,7 @@@ static struct ipv6_devconf ipv6_devcon .seg6_require_hmac = 0, #endif .enhanced_dad = 1, + .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, };
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@@ -295,7 -294,6 +295,7 @@@ .seg6_require_hmac = 0, #endif .enhanced_dad = 1, + .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, };
/* Check if a valid qdisc is available */ @@@ -388,9 -386,9 +388,9 @@@ static struct inet6_dev *ipv6_add_dev(s memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
if (ndev->cnf.stable_secret.initialized) - ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; else - ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; + ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode;
ndev->cnf.mtu6 = dev->mtu; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@@ -2146,14 -2144,12 +2146,14 @@@ static int ipv6_generate_eui64(u8 *eui case ARPHRD_SIT: return addrconf_ifid_sit(eui, dev); case ARPHRD_IPGRE: + case ARPHRD_TUNNEL: return addrconf_ifid_gre(eui, dev); case ARPHRD_6LOWPAN: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); case ARPHRD_TUNNEL6: + case ARPHRD_IP6GRE: return addrconf_ifid_ip6tnl(eui, dev); } return -1; @@@ -2391,8 -2387,8 +2391,8 @@@ static void manage_tempaddrs(struct ine
static bool is_addr_mode_generate_stable(struct inet6_dev *idev) { - return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || - idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; + return idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; }
int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, @@@ -3156,7 -3152,7 +3156,7 @@@ static void addrconf_addr_gen(struct in
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- switch (idev->addr_gen_mode) { + switch (idev->cnf.addr_gen_mode) { case IN6_ADDR_GEN_MODE_RANDOM: ipv6_gen_mode_random_init(idev); /* fallthrough */ @@@ -3197,9 -3193,6 +3197,9 @@@ static void addrconf_dev_config(struct (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN) && + (dev->type != ARPHRD_IP6GRE) && + (dev->type != ARPHRD_IPGRE) && + (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ return; @@@ -3211,8 -3204,8 +3211,8 @@@
/* this device type has no EUI support */ if (dev->type == ARPHRD_NONE && - idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) - idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
addrconf_addr_gen(idev, false); } @@@ -4895,13 -4888,6 +4895,13 @@@ static void inet6_ifa_notify(int event struct net *net = dev_net(ifa->idev->dev); int err = -ENOBUFS;
+ /* Don't send DELADDR notification for TENTATIVE address, + * since NEWADDR notification is sent only after removing + * TENTATIVE flag. + */ + if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR) + return; + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); if (!skb) goto errout; @@@ -4989,7 -4975,6 +4989,7 @@@ static inline void ipv6_store_devconf(s array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; #endif array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; + array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; }
static inline size_t inet6_ifla6_size(void) @@@ -5101,7 -5086,7 +5101,7 @@@ static int inet6_fill_ifla6_attrs(struc if (!nla) goto nla_put_failure;
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) goto nla_put_failure;
read_lock_bh(&idev->lock); @@@ -5219,26 -5204,6 +5219,26 @@@ static int inet6_validate_link_af(cons return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); }
+static int check_addr_gen_mode(int mode) +{ + if (mode != IN6_ADDR_GEN_MODE_EUI64 && + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + mode != IN6_ADDR_GEN_MODE_RANDOM) + return -EINVAL; + return 1; +} + +static int check_stable_privacy(struct inet6_dev *idev, struct net *net, + int mode) +{ + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !net->ipv6.devconf_dflt->stable_secret.initialized) + return -EINVAL; + return 1; +} + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) { int err = -EINVAL; @@@ -5260,11 -5225,18 +5260,11 @@@ if (tb[IFLA_INET6_ADDR_GEN_MODE]) { u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
- if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE && - mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && - mode != IN6_ADDR_GEN_MODE_RANDOM) - return -EINVAL; - - if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && - !idev->cnf.stable_secret.initialized && - !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) + if (check_addr_gen_mode(mode) < 0 || + check_stable_privacy(idev, dev_net(dev), mode) < 0) return -EINVAL;
- idev->addr_gen_mode = mode; + idev->cnf.addr_gen_mode = mode; err = 0; }
@@@ -5568,8 -5540,7 +5568,7 @@@ static void addrconf_disable_change(str struct net_device *dev; struct inet6_dev *idev;
- rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@@ -5578,7 -5549,6 +5577,6 @@@ dev_disable_change(idev); } } - rcu_read_unlock(); }
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) @@@ -5673,47 -5643,6 +5671,47 @@@ int addrconf_sysctl_proxy_ndp(struct ct return ret; }
+static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret = 0; + int new_val; + struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; + struct net *net = (struct net *)ctl->extra2; + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write) { + new_val = *((int *)ctl->data); + + if (check_addr_gen_mode(new_val) < 0) + return -EINVAL; + + /* request for default */ + if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) { + ipv6_devconf_dflt.addr_gen_mode = new_val; + + /* request for individual net device */ + } else { + if (!idev) + return ret; + + if (check_stable_privacy(idev, net, new_val) < 0) + return -EINVAL; + + if (idev->cnf.addr_gen_mode != new_val) { + idev->cnf.addr_gen_mode = new_val; + rtnl_lock(); + addrconf_dev_config(idev->dev); + rtnl_unlock(); + } + } + } + + return ret; +} + static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@@ -5764,14 -5693,14 +5762,14 @@@ struct inet6_dev *idev = __in6_dev_get(dev);
if (idev) { - idev->addr_gen_mode = + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; } } } else { struct inet6_dev *idev = ctl->extra1;
- idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; }
out: @@@ -6159,13 -6088,6 +6157,13 @@@ static const struct ctl_table addrconf_ .proc_handler = proc_dointvec, }, { + .procname = "addr_gen_mode", + .data = &ipv6_devconf.addr_gen_mode, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_addr_gen_mode, + }, + { /* sentinel */ } }; diff --combined net/ipv6/inet6_connection_sock.c index 97074c4,75c3082..9a31d13 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@@ -28,6 -28,46 +28,6 @@@ #include <net/inet6_connection_sock.h> #include <net/sock_reuseport.h>
-int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool reuseport_ok) -{ - const struct sock *sk2; - bool reuse = !!sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; - kuid_t uid = sock_i_uid((struct sock *)sk); - - /* We must walk the whole port owner list in this case. -DaveM */ - /* - * See comment in inet_csk_bind_conflict about sock lookup - * vs net namespaces issues. - */ - sk_for_each_bound(sk2, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, - sock_i_uid((struct sock *)sk2))))) { - if (ipv6_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && - sk2->sk_state != TCP_LISTEN && - ipv6_rcv_saddr_equal(sk, sk2, true)) - break; - } - } - - return sk2 != NULL; -} -EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); - struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, @@@ -136,7 -176,7 +136,7 @@@ int inet6_csk_xmit(struct sock *sk, str /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), np->tclass); rcu_read_unlock(); return res; diff --combined net/ipv6/ip6_gre.c index 1ba7567,5586318..51b9835 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@@ -484,6 -484,11 +484,6 @@@ drop return 0; }
-struct ipv6_tel_txoption { - struct ipv6_txoptions ops; - __u8 dst_opt[8]; -}; - static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, @@@ -577,6 -582,9 +577,9 @@@ static inline int ip6gre_xmit_ipv6(stru return -1;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); + if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; @@@ -993,9 -1001,6 +996,9 @@@ static void ip6gre_tunnel_setup(struct dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); }
static int ip6gre_tunnel_init_common(struct net_device *dev) diff --combined net/ipv6/route.c index 5046d2b,7ea8537..61d7006 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@@ -2711,16 -2711,13 +2711,16 @@@ struct arg_dev_net struct net *net; };
+/* called with write lock held for table with rt */ static int fib6_ifdown(struct rt6_info *rt, void *arg) { const struct arg_dev_net *adn = arg; const struct net_device *dev = adn->dev;
if ((rt->dst.dev == dev || !dev) && - rt != adn->net->ipv6.ip6_null_entry) + rt != adn->net->ipv6.ip6_null_entry && + (rt->rt6i_nsiblings == 0 || + !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) return -1;
return 0; @@@ -2899,6 -2896,11 +2899,11 @@@ static int rtm_to_fib6_config(struct sk if (tb[RTA_MULTIPATH]) { cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + + err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, + cfg->fc_mp_len); + if (err < 0) + goto errout; }
if (tb[RTA_PREF]) { @@@ -2912,9 -2914,14 +2917,14 @@@ if (tb[RTA_ENCAP]) cfg->fc_encap = tb[RTA_ENCAP];
- if (tb[RTA_ENCAP_TYPE]) + if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; + } + if (tb[RTA_EXPIRES]) { unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
@@@ -3172,7 -3179,7 +3182,7 @@@ static int rt6_fill_node(struct net *ne struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, - int prefix, int nowait, unsigned int flags) + unsigned int flags) { u32 metrics[RTAX_MAX]; struct rtmsg *rtm; @@@ -3180,6 -3187,13 +3190,6 @@@ long expires; u32 table;
- if (prefix) { /* user wants prefix routes only */ - if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { - /* success since this is not a prefix route */ - return 1; - } - } - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@@ -3219,7 -3233,7 +3229,7 @@@ else rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; - if (!netif_carrier_ok(rt->dst.dev)) { + if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { rtm->rtm_flags |= RTNH_F_LINKDOWN; if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) rtm->rtm_flags |= RTNH_F_DEAD; @@@ -3257,12 -3271,19 +3267,12 @@@ if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait, - portid); - - if (err <= 0) { - if (!nowait) { - if (err == 0) - return 0; - goto nla_put_failure; - } else { - if (err == -EMSGSIZE) - goto nla_put_failure; - } - } + int err = ip6mr_get_route(net, skb, rtm, portid); + + if (err == 0) + return 0; + if (err < 0) + goto nla_put_failure; } else #endif if (nla_put_u32(skb, RTA_IIF, iif)) @@@ -3320,26 -3341,18 +3330,26 @@@ nla_put_failure int rt6_dump_route(struct rt6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; - int prefix; + struct net *net = arg->net; + + if (rt == net->ipv6.ip6_null_entry) + return 0;
if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); - prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; - } else - prefix = 0;
- return rt6_fill_node(arg->net, + /* user wants prefix routes only */ + if (rtm->rtm_flags & RTM_F_PREFIX && + !(rt->rt6i_flags & RTF_PREFIX_RT)) { + /* success since this is not a prefix route */ + return 1; + } + } + + return rt6_fill_node(net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, - prefix, 0, NLM_F_MULTI); + NLM_F_MULTI); }
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) @@@ -3420,11 -3433,17 +3430,11 @@@ goto errout; }
- /* Reserve room for dummy headers, this skb can pass - through good chunk of routing engine. - */ - skb_reset_mac_header(skb); - skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - skb_dst_set(skb, &rt->dst);
err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, 0, 0); + nlh->nlmsg_seq, 0); if (err < 0) { kfree_skb(skb); goto errout; @@@ -3451,7 -3470,7 +3461,7 @@@ void inet6_rt_notify(int event, struct goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, 0, 0, nlm_flags); + event, info->portid, seq, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --combined net/ipv6/tcp_ipv6.c index 95c05e5,cb89296..64834ec --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@@ -123,7 -123,6 +123,7 @@@ static int tcp_v6_connect(struct sock * struct dst_entry *dst; int addr_type; int err; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@@ -259,7 -258,7 +259,7 @@@ sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL);
- if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); @@@ -274,7 -273,7 +274,7 @@@ inet->inet_dport = usin->sin6_port;
tcp_set_state(sk, TCP_SYN_SENT); - err = inet6_hash_connect(&tcp_death_row, sk); + err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure;
@@@ -287,11 -286,6 +287,11 @@@ inet->inet_dport, &tp->tsoffset);
+ if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto late_failure; + err = tcp_connect(sk); if (err) goto late_failure; @@@ -300,6 -294,7 +300,6 @@@
late_failure: tcp_set_state(sk, TCP_CLOSE); - __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; @@@ -474,7 -469,7 +474,7 @@@ static int tcp_v6_send_synack(const str opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } @@@ -845,7 -840,7 +845,7 @@@ static void tcp_v6_send_response(const dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); + ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); @@@ -1625,6 -1620,7 +1625,6 @@@ static const struct inet_connection_soc .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@@ -1655,6 -1651,7 +1655,6 @@@ static const struct inet_connection_soc .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@@ -1747,7 -1744,7 +1747,7 @@@ static void get_tcp6_sock(struct seq_fi srcp = ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; @@@ -1891,7 -1888,6 +1891,7 @@@ struct proto tcpv6_prot = .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, @@@ -1952,7 -1948,7 +1952,7 @@@ static void __net_exit tcpv6_net_exit(s
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6); + inet_twsk_purge(&tcp_hashinfo, AF_INET6); }
static struct pernet_operations tcpv6_net_ops = { diff --combined net/mpls/af_mpls.c index 4dc8196,5b77377..64d3bf2 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@@ -8,7 -8,6 +8,7 @@@ #include <linux/ipv6.h> #include <linux/mpls.h> #include <linux/vmalloc.h> +#include <linux/percpu.h> #include <net/ip.h> #include <net/dst.h> #include <net/sock.h> @@@ -18,8 -17,8 +18,8 @@@ #include <net/netns/generic.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> -#include <net/addrconf.h> #endif +#include <net/addrconf.h> #include <net/nexthop.h> #include "internal.h"
@@@ -49,6 -48,11 +49,6 @@@ static struct mpls_route *mpls_route_in return rt; }
-static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) -{ - return rcu_dereference_rtnl(dev->mpls_ptr); -} - bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); @@@ -94,43 -98,19 +94,44 @@@ bool mpls_pkt_too_big(const struct sk_b } EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
+void mpls_stats_inc_outucastpkts(struct net_device *dev, + const struct sk_buff *skb) +{ + struct mpls_dev *mdev; + + if (skb->protocol == htons(ETH_P_MPLS_UC)) { + mdev = mpls_dev_get(dev); + if (mdev) + MPLS_INC_STATS_LEN(mdev, skb->len, + tx_packets, + tx_bytes); + } else if (skb->protocol == htons(ETH_P_IP)) { + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct inet6_dev *in6dev = __in6_dev_get(dev); + + if (in6dev) + IP6_UPD_PO_STATS(dev_net(dev), in6dev, + IPSTATS_MIB_OUT, skb->len); +#endif + } +} +EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts); + - static u32 mpls_multipath_hash(struct mpls_route *rt, - struct sk_buff *skb, bool bos) + static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) { struct mpls_entry_decoded dec; + unsigned int mpls_hdr_len = 0; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; u32 hash = 0;
- for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; label_index++) { - if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) + mpls_hdr_len += sizeof(*hdr); + if (!pskb_may_pull(skb, mpls_hdr_len)) break;
/* Read and decode the current label */ @@@ -155,37 -135,38 +156,38 @@@ eli_seen = true; }
- bos = dec.bos; - if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct iphdr))) { + if (!dec.bos) + continue; + + /* found bottom label; does skb have room for a header? */ + if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { const struct iphdr *v4hdr;
- v4hdr = (const struct iphdr *)(mpls_hdr(skb) + - label_index); + v4hdr = (const struct iphdr *)(hdr + 1); if (v4hdr->version == 4) { hash = jhash_3words(ntohl(v4hdr->saddr), ntohl(v4hdr->daddr), v4hdr->protocol, hash); } else if (v4hdr->version == 6 && - pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct ipv6hdr))) { + pskb_may_pull(skb, mpls_hdr_len + + sizeof(struct ipv6hdr))) { const struct ipv6hdr *v6hdr;
- v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + - label_index); - + v6hdr = (const struct ipv6hdr *)(hdr + 1); hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); hash = jhash_1word(v6hdr->nexthdr, hash); } } + + break; }
return hash; }
static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) + struct sk_buff *skb) { int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; @@@ -201,7 -182,7 +203,7 @@@ if (alive <= 0) return NULL;
- hash = mpls_multipath_hash(rt, skb, bos); + hash = mpls_multipath_hash(rt, skb); nh_index = hash % alive; if (alive == rt->rt_nhn) goto out; @@@ -274,7 -255,6 +276,7 @@@ static int mpls_forward(struct sk_buff struct mpls_nh *nh; struct mpls_entry_decoded dec; struct net_device *out_dev; + struct mpls_dev *out_mdev; struct mpls_dev *mdev; unsigned int hh_len; unsigned int new_header_size; @@@ -284,66 -264,56 +286,66 @@@ /* Careful this entire function runs inside of an rcu critical section */
mdev = mpls_dev_get(dev); - if (!mdev || !mdev->input_enabled) + if (!mdev) goto drop;
- if (skb->pkt_type != PACKET_HOST) + MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets, + rx_bytes); + + if (!mdev->input_enabled) { + MPLS_INC_STATS(mdev, rx_dropped); goto drop; + } + + if (skb->pkt_type != PACKET_HOST) + goto err;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - goto drop; + goto err;
if (!pskb_may_pull(skb, sizeof(*hdr))) - goto drop; + goto err;
/* Read and decode the label */ hdr = mpls_hdr(skb); dec = mpls_entry_decode(hdr);
- /* Pop the label */ - skb_pull(skb, sizeof(*hdr)); - skb_reset_network_header(skb); - - skb_orphan(skb); - rt = mpls_route_input_rcu(net, dec.label); - if (!rt) + if (!rt) { + MPLS_INC_STATS(mdev, rx_noroute); goto drop; + }
- nh = mpls_select_multipath(rt, skb, dec.bos); + nh = mpls_select_multipath(rt, skb); if (!nh) - goto drop; - - /* Find the output device */ - out_dev = rcu_dereference(nh->nh_dev); - if (!mpls_output_possible(out_dev)) - goto drop; + goto err;
+ /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + if (skb_warn_if_lro(skb)) - goto drop; + goto err;
skb_forward_csum(skb);
/* Verify ttl is valid */ if (dec.ttl <= 1) - goto drop; + goto err; dec.ttl -= 1;
+ /* Find the output device */ + out_dev = rcu_dereference(nh->nh_dev); + if (!mpls_output_possible(out_dev)) + goto tx_err; + /* Verify the destination can hold the packet */ new_header_size = mpls_nh_header_size(nh); mtu = mpls_dev_mtu(out_dev); if (mpls_pkt_too_big(skb, mtu - new_header_size)) - goto drop; + goto tx_err;
hh_len = LL_RESERVED_SPACE(out_dev); if (!out_dev->header_ops) @@@ -351,7 -321,7 +353,7 @@@
/* Ensure there is enough space for the headers in the skb */ if (skb_cow(skb, hh_len + new_header_size)) - goto drop; + goto tx_err;
skb->dev = out_dev; skb->protocol = htons(ETH_P_MPLS_UC); @@@ -359,7 -329,7 +361,7 @@@ if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ if (!mpls_egress(rt, skb, dec)) - goto drop; + goto err; } else { bool bos; int i; @@@ -375,8 -345,6 +377,8 @@@ } }
+ mpls_stats_inc_outucastpkts(out_dev, skb); + /* If via wasn't specified then send out using device address */ if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, @@@ -389,13 -357,6 +391,13 @@@ __func__, err); return 0;
+tx_err: + out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL; + if (out_mdev) + MPLS_INC_STATS(out_mdev, tx_errors); + goto drop; +err: + MPLS_INC_STATS(mdev, rx_errors); drop: kfree_skb(skb); return NET_RX_DROP; @@@ -894,70 -855,6 +896,70 @@@ errout return err; }
+static void mpls_get_stats(struct mpls_dev *mdev, + struct mpls_link_stats *stats) +{ + struct mpls_pcpu_stats *p; + int i; + + memset(stats, 0, sizeof(*stats)); + + for_each_possible_cpu(i) { + struct mpls_link_stats local; + unsigned int start; + + p = per_cpu_ptr(mdev->stats, i); + do { + start = u64_stats_fetch_begin(&p->syncp); + local = p->stats; + } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += local.rx_packets; + stats->rx_bytes += local.rx_bytes; + stats->tx_packets += local.tx_packets; + stats->tx_bytes += local.tx_bytes; + stats->rx_errors += local.rx_errors; + stats->tx_errors += local.tx_errors; + stats->rx_dropped += local.rx_dropped; + stats->tx_dropped += local.tx_dropped; + stats->rx_noroute += local.rx_noroute; + } +} + +static int mpls_fill_stats_af(struct sk_buff *skb, + const struct net_device *dev) +{ + struct mpls_link_stats *stats; + struct mpls_dev *mdev; + struct nlattr *nla; + + mdev = mpls_dev_get(dev); + if (!mdev) + return -ENODATA; + + nla = nla_reserve_64bit(skb, MPLS_STATS_LINK, + sizeof(struct mpls_link_stats), + MPLS_STATS_UNSPEC); + if (!nla) + return -EMSGSIZE; + + stats = nla_data(nla); + mpls_get_stats(mdev, stats); + + return 0; +} + +static size_t mpls_get_stats_af_size(const struct net_device *dev) +{ + struct mpls_dev *mdev; + + mdev = mpls_dev_get(dev); + if (!mdev) + return 0; + + return nla_total_size_64bit(sizeof(struct mpls_link_stats)); +} + #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ (&((struct mpls_dev *)0)->field)
@@@ -1016,7 -913,6 +1018,7 @@@ static struct mpls_dev *mpls_add_dev(st { struct mpls_dev *mdev; int err = -ENOMEM; + int i;
ASSERT_RTNL();
@@@ -1024,17 -920,6 +1026,17 @@@ if (!mdev) return ERR_PTR(err);
+ mdev->stats = alloc_percpu(struct mpls_pcpu_stats); + if (!mdev->stats) + goto free; + + for_each_possible_cpu(i) { + struct mpls_pcpu_stats *mpls_stats; + + mpls_stats = per_cpu_ptr(mdev->stats, i); + u64_stats_init(&mpls_stats->syncp); + } + err = mpls_dev_sysctl_register(dev, mdev); if (err) goto free; @@@ -1044,19 -929,10 +1046,19 @@@ return mdev;
free: + free_percpu(mdev->stats); kfree(mdev); return ERR_PTR(err); }
+static void mpls_dev_destroy_rcu(struct rcu_head *head) +{ + struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu); + + free_percpu(mdev->stats); + kfree(mdev); +} + static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; @@@ -1171,7 -1047,7 +1173,7 @@@ static int mpls_dev_notify(struct notif if (mdev) { mpls_dev_sysctl_unregister(mdev); RCU_INIT_POINTER(dev->mpls_ptr, NULL); - kfree_rcu(mdev, rcu); + call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); } break; case NETDEV_CHANGENAME: @@@ -1832,12 -1708,6 +1834,12 @@@ static struct pernet_operations mpls_ne .exit = mpls_net_exit, };
+static struct rtnl_af_ops mpls_af_ops __read_mostly = { + .family = AF_MPLS, + .fill_stats_af = mpls_fill_stats_af, + .get_stats_af_size = mpls_get_stats_af_size, +}; + static int __init mpls_init(void) { int err; @@@ -1854,8 -1724,6 +1856,8 @@@
dev_add_pack(&mpls_packet_type);
+ rtnl_af_register(&mpls_af_ops); + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); @@@ -1872,7 -1740,6 +1874,7 @@@ module_init(mpls_init) static void __exit mpls_exit(void) { rtnl_unregister_all(PF_MPLS); + rtnl_af_unregister(&mpls_af_ops); dev_remove_pack(&mpls_packet_type); unregister_netdevice_notifier(&mpls_dev_notifier); unregister_pernet_subsys(&mpls_net_ops); diff --combined net/mpls/mpls_iptunnel.c index 0253128,1d281c1..67b7a95 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@@ -48,15 -48,11 +48,15 @@@ static int mpls_xmit(struct sk_buff *sk struct dst_entry *dst = skb_dst(skb); struct rtable *rt = NULL; struct rt6_info *rt6 = NULL; + struct mpls_dev *out_mdev; int err = 0; bool bos; int i; unsigned int ttl;
+ /* Find the output device */ + out_dev = dst->dev; + /* Obtain the ttl */ if (dst->ops->family == AF_INET) { ttl = ip_hdr(skb)->ttl; @@@ -70,6 -66,8 +70,6 @@@
skb_orphan(skb);
- /* Find the output device */ - out_dev = dst->dev; if (!mpls_output_possible(out_dev) || !dst->lwtstate || skb_warn_if_lro(skb)) goto drop; @@@ -111,8 -109,6 +111,8 @@@ bos = false; }
+ mpls_stats_inc_outucastpkts(out_dev, skb); + if (rt) err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, skb); @@@ -126,9 -122,6 +126,9 @@@ return LWTUNNEL_XMIT_DONE;
drop: + out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL; + if (out_mdev) + MPLS_INC_STATS(out_mdev, tx_errors); kfree_skb(skb); return -EINVAL; } @@@ -222,6 -215,7 +222,7 @@@ static const struct lwtunnel_encap_ops .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, + .owner = THIS_MODULE, };
static int __init mpls_iptunnel_init(void) diff --combined net/packet/af_packet.c index ddbda25,3d555c7..9854baa --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -409,9 -409,6 +409,9 @@@ static void __packet_set_status(struct flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: + h.h3->tp_status = status; + flush_dcache_page(pgv_to_page(&h.h3->tp_status)); + break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); @@@ -435,8 -432,6 +435,8 @@@ static int __packet_get_status(struct p flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; case TPACKET_V3: + flush_dcache_page(pgv_to_page(&h.h3->tp_status)); + return h.h3->tp_status; default: WARN(1, "TPACKET version not supported.\n"); BUG(); @@@ -481,9 -476,6 +481,9 @@@ static __u32 __packet_set_timestamp(str h.h2->tp_nsec = ts.tv_nsec; break; case TPACKET_V3: + h.h3->tp_sec = ts.tv_sec; + h.h3->tp_nsec = ts.tv_nsec; + break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); @@@ -1984,7 -1976,7 +1984,7 @@@ static int packet_rcv_vnet(struct msghd return -EINVAL; *len -= sizeof(vnet_hdr);
- if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le())) + if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true)) return -EINVAL;
return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); @@@ -2245,7 -2237,7 +2245,7 @@@ static int tpacket_rcv(struct sk_buff * if (po->has_vnet_hdr) { if (virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le())) { + vio_le(), true)) { spin_lock(&sk->sk_receive_queue.lock); goto drop_n_account; } @@@ -2505,13 -2497,6 +2505,13 @@@ static int tpacket_parse_header(struct ph.raw = frame;
switch (po->tp_version) { + case TPACKET_V3: + if (ph.h3->tp_next_offset != 0) { + pr_warn_once("variable sized slot not supported"); + return -EINVAL; + } + tp_len = ph.h3->tp_len; + break; case TPACKET_V2: tp_len = ph.h2->tp_len; break; @@@ -2531,9 -2516,6 +2531,9 @@@ off_max = po->tx_ring.frame_size - tp_len; if (po->sk.sk_type == SOCK_DGRAM) { switch (po->tp_version) { + case TPACKET_V3: + off = ph.h3->tp_net; + break; case TPACKET_V2: off = ph.h2->tp_net; break; @@@ -2543,9 -2525,6 +2543,9 @@@ } } else { switch (po->tp_version) { + case TPACKET_V3: + off = ph.h3->tp_mac; + break; case TPACKET_V2: off = ph.h2->tp_mac; break; @@@ -4134,6 -4113,11 +4134,6 @@@ static int packet_set_ring(struct sock struct tpacket_req *req = &req_u->req;
lock_sock(sk); - /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ - if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { - net_warn_ratelimited("Tx-ring is not supported.\n"); - goto out; - }
rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; @@@ -4193,19 -4177,11 +4193,19 @@@ goto out; switch (po->tp_version) { case TPACKET_V3: - /* Transmit path is not supported. We checked - * it above but just being paranoid - */ - if (!tx_ring) + /* Block transmit is not supported yet */ + if (!tx_ring) { init_prb_bdqc(po, rb, pg_vec, req_u); + } else { + struct tpacket_req3 *req3 = &req_u->req3; + + if (req3->tp_retire_blk_tov || + req3->tp_sizeof_priv || + req3->tp_feature_req_word) { + err = -EINVAL; + goto out; + } + } break; default: break; diff --combined net/sctp/ipv6.c index 6619367,64dfd35..063baac --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@@ -222,7 -222,8 +222,8 @@@ static int sctp_v6_xmit(struct sk_buff SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
rcu_read_lock(); - res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); + res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } @@@ -412,20 -413,22 +413,20 @@@ static void sctp_v6_copy_addrlist(struc static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { - __be16 *port; - struct sctphdr *sh; + /* Always called on head skb, so this is safe */ + struct sctphdr *sh = sctp_hdr(skb); + struct sockaddr_in6 *sa = &addr->v6;
- port = &addr->v6.sin6_port; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; /* FIXME */ addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
- /* Always called on head skb, so this is safe */ - sh = sctp_hdr(skb); if (is_saddr) { - *port = sh->source; - addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; + sa->sin6_port = sh->source; + sa->sin6_addr = ipv6_hdr(skb)->saddr; } else { - *port = sh->dest; - addr->v6.sin6_addr = ipv6_hdr(skb)->daddr; + sa->sin6_port = sh->dest; + sa->sin6_addr = ipv6_hdr(skb)->daddr; } }
diff --combined net/sctp/socket.c index d699d2c,37eeab7..5fc7122 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -235,8 -235,12 +235,12 @@@ static struct sctp_transport *sctp_addr sctp_assoc_t id) { struct sctp_association *addr_asoc = NULL, *id_asoc = NULL; - struct sctp_transport *transport; + struct sctp_af *af = sctp_get_af_specific(addr->ss_family); union sctp_addr *laddr = (union sctp_addr *)addr; + struct sctp_transport *transport; + + if (sctp_verify_addr(sk, laddr, af->sockaddr_len)) + return NULL;
addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, laddr, @@@ -360,7 -364,7 +364,7 @@@ static int sctp_do_bind(struct sock *sk } }
- if (snum && snum < PROT_SOCK && + if (snum && snum < inet_prot_sock(net) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES;
@@@ -1152,10 -1156,8 +1156,10 @@@ static int __sctp_connect(struct sock * * accept new associations, but it SHOULD NOT * be permitted to open new associations. */ - if (ep->base.bind_addr.port < PROT_SOCK && - !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) { + if (ep->base.bind_addr.port < + inet_prot_sock(net) && + !ns_capable(net->user_ns, + CAP_NET_BIND_SERVICE)) { err = -EACCES; goto out_free; } @@@ -1820,7 -1822,7 +1824,7 @@@ static int sctp_sendmsg(struct sock *sk * but it SHOULD NOT be permitted to open new * associations. */ - if (ep->base.bind_addr.port < PROT_SOCK && + if (ep->base.bind_addr.port < inet_prot_sock(net) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) { err = -EACCES; goto out_unlock; @@@ -2432,6 -2434,7 +2436,6 @@@ static int sctp_apply_peer_addr_params( sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; - sctp_frag_point(asoc, params->spp_pathmtu); } else { sp->pathmtu = params->spp_pathmtu; } @@@ -3752,68 -3755,6 +3756,68 @@@ out return retval; }
+static int sctp_setsockopt_enable_strreset(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_value & (~SCTP_ENABLE_STRRESET_MASK)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + asoc->strreset_enable = params.assoc_value; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + sp->ep->strreset_enable = params.assoc_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + +static int sctp_setsockopt_reset_streams(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_reset_streams *params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen < sizeof(struct sctp_reset_streams)) + return -EINVAL; + + params = memdup_user(optval, optlen); + if (IS_ERR(params)) + return PTR_ERR(params); + + asoc = sctp_id2assoc(sk, params->srs_assoc_id); + if (!asoc) + goto out; + + retval = sctp_send_reset_streams(asoc, params); + +out: + kfree(params); + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@@ -3980,12 -3921,6 +3984,12 @@@ static int sctp_setsockopt(struct sock case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); break; + case SCTP_ENABLE_STREAM_RESET: + retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); + break; + case SCTP_RESET_STREAMS: + retval = sctp_setsockopt_reset_streams(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -6470,47 -6405,6 +6474,47 @@@ out return retval; }
+static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->strreset_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->ep->strreset_enable; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@@ -6678,10 -6572,6 +6682,10 @@@ retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_ENABLE_STREAM_RESET: + retval = sctp_getsockopt_enable_strreset(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; diff --combined net/tipc/node.c index f96dacf,2775332..e9295fa --- a/net/tipc/node.c +++ b/net/tipc/node.c @@@ -263,6 -263,11 +263,11 @@@ static void tipc_node_write_lock(struc write_lock_bh(&n->lock); }
+ static void tipc_node_write_unlock_fast(struct tipc_node *n) + { + write_unlock_bh(&n->lock); + } + static void tipc_node_write_unlock(struct tipc_node *n) { struct net *net = n->net; @@@ -417,7 -422,7 +422,7 @@@ void tipc_node_subscribe(struct net *ne } tipc_node_write_lock(n); list_add_tail(subscr, &n->publ_list); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); }
@@@ -435,7 -440,7 +440,7 @@@ void tipc_node_unsubscribe(struct net * } tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); }
@@@ -1167,7 -1172,7 +1172,7 @@@ msg_full * @list: chain of buffers containing message * @dnode: address of destination node * @selector: a number used for deterministic link selection - * Consumes the buffer chain, except when returning -ELINKCONG + * Consumes the buffer chain. * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF */ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, @@@ -1206,10 -1211,10 +1211,10 @@@ spin_unlock_bh(&le->lock); tipc_node_read_unlock(n);
- if (likely(rc == 0)) - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); - else if (rc == -ENOBUFS) + if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); + else + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
tipc_node_put(n);
@@@ -1221,15 -1226,20 +1226,15 @@@ * messages, which will not be rejected * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. - * TODO: Return real return value, and let callers use - * tipc_wait_for_sendpkt() where applicable */ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; - int rc;
skb_queue_head_init(&head); __skb_queue_tail(&head, skb); - rc = tipc_node_xmit(net, &head, dnode, selector); - if (rc == -ELINKCONG) - kfree_skb(skb); + tipc_node_xmit(net, &head, dnode, selector); return 0; }
@@@ -1257,19 -1267,6 +1262,19 @@@ void tipc_node_broadcast(struct net *ne kfree_skb(skb); }
+static void tipc_node_mcast_rcv(struct tipc_node *n) +{ + struct tipc_bclink_entry *be = &n->bc_entry; + + /* 'arrvq' is under inputq2's lock protection */ + spin_lock_bh(&be->inputq2.lock); + spin_lock_bh(&be->inputq1.lock); + skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); + spin_unlock_bh(&be->inputq1.lock); + spin_unlock_bh(&be->inputq2.lock); + tipc_sk_mcast_rcv(n->net, &be->arrvq, &be->inputq2); +} + static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, int bearer_id, struct sk_buff_head *xmitq) { @@@ -1343,8 -1340,15 +1348,8 @@@ static void tipc_node_bc_rcv(struct ne if (!skb_queue_empty(&xmitq)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
- /* Deliver. 'arrvq' is under inputq2's lock protection */ - if (!skb_queue_empty(&be->inputq1)) { - spin_lock_bh(&be->inputq2.lock); - spin_lock_bh(&be->inputq1.lock); - skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); - spin_unlock_bh(&be->inputq1.lock); - spin_unlock_bh(&be->inputq2.lock); - tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2); - } + if (!skb_queue_empty(&be->inputq1)) + tipc_node_mcast_rcv(n);
if (rc & TIPC_LINK_DOWN_EVT) { /* Reception reassembly failure => reset all links to peer */ @@@ -1571,9 -1575,6 +1576,9 @@@ void tipc_rcv(struct net *net, struct s if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) tipc_named_rcv(net, &n->bc_entry.namedq);
+ if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) + tipc_node_mcast_rcv(n); + if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq);