[linux-next] LinuxNextTracking branch, master, updated. next-20160224
by batmanīŧ open-mesh.org
The following commit has been merged in the master branch:
commit b633353115e352d3c31c12d4c61978c810f05ea1
Merge: b1d95ae5c5bd3deba84d00c4f83d7d0836b5936f dea08e604408d0303e2332896c5fdd8c1f7d79a2
Author: David S. Miller <davem(a)davemloft.net>
Date: Tue Feb 23 00:09:14 2016 -0500
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts:
drivers/net/phy/bcm7xxx.c
drivers/net/phy/marvell.c
drivers/net/vxlan.c
All three conflicts were cases of simple overlapping changes.
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --combined MAINTAINERS
index 355e1c8,2fa3303..27393cf
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -223,9 -223,7 +223,7 @@@ F: drivers/scsi/aacraid
ABI/API
L: linux-api(a)vger.kernel.org
- F: Documentation/ABI/
F: include/linux/syscalls.h
- F: include/uapi/
F: kernel/sys_ni.c
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
@@@ -686,13 -684,6 +684,6 @@@ M: Michael Hanselmann <linux-kernel@han
S: Supported
F: drivers/macintosh/ams/
- AMSO1100 RNIC DRIVER
- M: Tom Tucker <tom(a)opengridcomputing.com>
- M: Steve Wise <swise(a)opengridcomputing.com>
- L: linux-rdma(a)vger.kernel.org
- S: Maintained
- F: drivers/infiniband/hw/amso1100/
-
ANALOG DEVICES INC AD9389B DRIVER
M: Hans Verkuil <hans.verkuil(a)cisco.com>
L: linux-media(a)vger.kernel.org
@@@ -967,6 -958,8 +958,8 @@@ M: Rob Herring <robh(a)kernel.org
L: linux-arm-kernel(a)lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-highbank/
+ F: arch/arm/boot/dts/highbank.dts
+ F: arch/arm/boot/dts/ecx-*.dts*
ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
M: Krzysztof Halasa <khalasa(a)piap.pl>
@@@ -1042,6 -1035,7 +1035,7 @@@ M: Barry Song <baohua(a)kernel.org
L: linux-arm-kernel(a)lists.infradead.org (moderated for non-subscribers)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
S: Maintained
+ F: arch/arm/boot/dts/prima2*
F: arch/arm/mach-prima2/
F: drivers/clk/sirf/
F: drivers/clocksource/timer-prima2.c
@@@ -1143,6 -1137,10 +1137,10 @@@ W: http://www.hisilicon.co
S: Supported
T: git git://github.com/hisilicon/linux-hisi.git
F: arch/arm/mach-hisi/
+ F: arch/arm/boot/dts/hi3*
+ F: arch/arm/boot/dts/hip*
+ F: arch/arm/boot/dts/hisi*
+ F: arch/arm64/boot/dts/hisilicon/
ARM/HP JORNADA 7XX MACHINE SUPPORT
M: Kristoffer Ericson <kristoffer.ericson(a)gmail.com>
@@@ -1219,6 -1217,7 +1217,7 @@@ M: Santosh Shilimkar <ssantosh(a)kernel.o
L: linux-arm-kernel(a)lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-keystone/
+ F: arch/arm/boot/dts/k2*
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@@ -1287,6 -1286,7 +1286,7 @@@ L: linux-arm-kernel(a)lists.infradead.or
S: Maintained
F: arch/arm/mach-berlin/
F: arch/arm/boot/dts/berlin*
+ F: arch/arm64/boot/dts/marvell/berlin*
ARM/Marvell Dove/MV78xx0/Orion SOC support
@@@ -1425,6 -1425,7 +1425,7 @@@ S: Maintaine
F: arch/arm/boot/dts/qcom-*.dts
F: arch/arm/boot/dts/qcom-*.dtsi
F: arch/arm/mach-qcom/
+ F: arch/arm64/boot/dts/qcom/*
F: drivers/soc/qcom/
F: drivers/tty/serial/msm_serial.h
F: drivers/tty/serial/msm_serial.c
@@@ -1441,8 -1442,8 +1442,8 @@@ S: Maintaine
ARM/RENESAS ARM64 ARCHITECTURE
M: Simon Horman <horms(a)verge.net.au>
M: Magnus Damm <magnus.damm(a)gmail.com>
- L: linux-sh(a)vger.kernel.org
- Q: http://patchwork.kernel.org/project/linux-sh/list/
+ L: linux-renesas-soc(a)vger.kernel.org
+ Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
S: Supported
F: arch/arm64/boot/dts/renesas/
@@@ -1484,6 -1485,8 +1485,8 @@@ L: linux-arm-kernel(a)lists.infradead.or
L: linux-samsung-soc(a)vger.kernel.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/boot/dts/s3c*
+ F: arch/arm/boot/dts/s5p*
+ F: arch/arm/boot/dts/samsung*
F: arch/arm/boot/dts/exynos*
F: arch/arm64/boot/dts/exynos/
F: arch/arm/plat-samsung/
@@@ -1563,6 -1566,7 +1566,7 @@@ S: Maintaine
F: arch/arm/mach-socfpga/
F: arch/arm/boot/dts/socfpga*
F: arch/arm/configs/socfpga_defconfig
+ F: arch/arm64/boot/dts/altera/
W: http://www.rocketboards.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
@@@ -1716,7 -1720,7 +1720,7 @@@ M: Lorenzo Pieralisi <lorenzo.pieralisi
L: linux-arm-kernel(a)lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/boot/dts/vexpress*
- F: arch/arm64/boot/dts/arm/vexpress*
+ F: arch/arm64/boot/dts/arm/
F: arch/arm/mach-vexpress/
F: */*/vexpress*
F: */*/*/vexpress*
@@@ -2147,7 -2151,7 +2151,7 @@@ M: Marek Lindner <mareklindner@neomailb
M: Simon Wunderlich <sw(a)simonwunderlich.de>
M: Antonio Quartulli <a(a)unstable.cc>
L: b.a.t.m.a.n(a)lists.open-mesh.org
-W: http://www.open-mesh.org/
+W: https://www.open-mesh.org/
S: Maintained
F: net/batman-adv/
@@@ -2343,6 -2347,7 +2347,7 @@@ F: arch/arm/mach-bcm
F: arch/arm/boot/dts/bcm113*
F: arch/arm/boot/dts/bcm216*
F: arch/arm/boot/dts/bcm281*
+ F: arch/arm64/boot/dts/broadcom/
F: arch/arm/configs/bcm_defconfig
F: drivers/mmc/host/sdhci-bcm-kona.c
F: drivers/clocksource/bcm_kona_timer.c
@@@ -2357,14 -2362,6 +2362,6 @@@ T: git git://git.kernel.org/pub/scm/lin
S: Maintained
N: bcm2835
- BROADCOM BCM33XX MIPS ARCHITECTURE
- M: Kevin Cernekee <cernekee(a)gmail.com>
- L: linux-mips(a)linux-mips.org
- S: Maintained
- F: arch/mips/bcm3384/*
- F: arch/mips/include/asm/mach-bcm3384/*
- F: arch/mips/kernel/*bmips*
-
BROADCOM BCM47XX MIPS ARCHITECTURE
M: Hauke Mehrtens <hauke(a)hauke-m.de>
M: RafaÅ MiÅecki <zajec5(a)gmail.com>
@@@ -2424,7 -2421,6 +2421,7 @@@ F: include/linux/bcm963xx_nvram.
F: include/linux/bcm963xx_tag.h
BROADCOM TG3 GIGABIT ETHERNET DRIVER
+M: Siva Reddy Kallam <siva.kallam(a)broadcom.com>
M: Prashant Sreedharan <prashant(a)broadcom.com>
M: Michael Chan <mchan(a)broadcom.com>
L: netdev(a)vger.kernel.org
@@@ -3446,7 -3442,7 +3443,7 @@@ S: Maintaine
F: drivers/usb/dwc2/
DESIGNWARE USB3 DRD IP DRIVER
- M: Felipe Balbi <balbi(a)ti.com>
+ M: Felipe Balbi <balbi(a)kernel.org>
L: linux-usb(a)vger.kernel.org
L: linux-omap(a)vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@@ -4185,13 -4181,6 +4182,6 @@@ W: http://aeschi.ch.eu.org/efs
S: Orphan
F: fs/efs/
- EHCA (IBM GX bus InfiniBand adapter) DRIVER
- M: Hoang-Nam Nguyen <hnguyen(a)de.ibm.com>
- M: Christoph Raisch <raisch(a)de.ibm.com>
- L: linux-rdma(a)vger.kernel.org
- S: Supported
- F: drivers/infiniband/hw/ehca/
-
EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
M: Thadeu Lima de Souza Cascardo <cascardo(a)linux.vnet.ibm.com>
L: netdev(a)vger.kernel.org
@@@ -5810,12 -5799,6 +5800,6 @@@ M: Juanjo Ciarlante <jjciarla(a)raiz.uncu
S: Maintained
F: net/ipv4/netfilter/ipt_MASQUERADE.c
- IPATH DRIVER
- M: Mike Marciniszyn <infinipath(a)intel.com>
- L: linux-rdma(a)vger.kernel.org
- S: Maintained
- F: drivers/staging/rdma/ipath/
-
IPMI SUBSYSTEM
M: Corey Minyard <minyard(a)acm.org>
L: openipmi-developer(a)lists.sourceforge.net (moderated for non-subscribers)
@@@ -6145,7 -6128,7 +6129,7 @@@ F: include/uapi/linux/sunrpc
KERNEL SELFTEST FRAMEWORK
M: Shuah Khan <shuahkh(a)osg.samsung.com>
- L: linux-api(a)vger.kernel.org
+ L: linux-kselftest(a)vger.kernel.org
T: git git://git.kernel.org/pub/scm/shuah/linux-kselftest
S: Maintained
F: tools/testing/selftests
@@@ -7371,7 -7354,7 +7355,7 @@@ F: drivers/tty/isicom.
F: include/linux/isicom.h
MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
- M: Felipe Balbi <balbi(a)ti.com>
+ M: Felipe Balbi <balbi(a)kernel.org>
L: linux-usb(a)vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
S: Maintained
@@@ -7940,7 -7923,7 +7924,7 @@@ F: drivers/media/platform/omap3isp
F: drivers/staging/media/omap4iss/
OMAP USB SUPPORT
- M: Felipe Balbi <balbi(a)ti.com>
+ M: Felipe Balbi <balbi(a)kernel.org>
L: linux-usb(a)vger.kernel.org
L: linux-omap(a)vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@@ -8819,6 -8802,7 +8803,7 @@@ L: linux-arm-kernel(a)lists.infradead.or
T: git git://github.com/hzhuang1/linux.git
T: git git://github.com/rjarzmik/linux.git
S: Maintained
+ F: arch/arm/boot/dts/pxa*
F: arch/arm/mach-pxa/
F: drivers/dma/pxa*
F: drivers/pcmcia/pxa2xx*
@@@ -8848,6 -8832,7 +8833,7 @@@ L: linux-arm-kernel(a)lists.infradead.or
T: git git://github.com/hzhuang1/linux.git
T: git git://git.linaro.org/people/ycmiao/pxa-linux.git
S: Maintained
+ F: arch/arm/boot/dts/mmp*
F: arch/arm/mach-mmp/
PXA MMCI DRIVER
@@@ -9794,10 -9779,11 +9780,11 @@@ S: Supporte
F: drivers/scsi/be2iscsi/
Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER
- M: Sathya Perla <sathya.perla(a)avagotech.com>
- M: Ajit Khaparde <ajit.khaparde(a)avagotech.com>
- M: Padmanabh Ratnakar <padmanabh.ratnakar(a)avagotech.com>
- M: Sriharsha Basavapatna <sriharsha.basavapatna(a)avagotech.com>
+ M: Sathya Perla <sathya.perla(a)broadcom.com>
+ M: Ajit Khaparde <ajit.khaparde(a)broadcom.com>
+ M: Padmanabh Ratnakar <padmanabh.ratnakar(a)broadcom.com>
+ M: Sriharsha Basavapatna <sriharsha.basavapatna(a)broadcom.com>
+ M: Somnath Kotur <somnath.kotur(a)broadcom.com>
L: netdev(a)vger.kernel.org
W: http://www.emulex.com
S: Supported
@@@ -10159,6 -10145,7 +10146,7 @@@ S: Supporte
F: drivers/media/pci/solo6x10/
SOFTWARE RAID (Multiple Disks) SUPPORT
+ M: Shaohua Li <shli(a)kernel.org>
L: linux-raid(a)vger.kernel.org
T: git git://neil.brown.name/md
S: Supported
@@@ -10292,6 -10279,7 +10280,7 @@@ L: spear-devel(a)list.st.co
L: linux-arm-kernel(a)lists.infradead.org (moderated for non-subscribers)
W: http://www.st.com/spear
S: Maintained
+ F: arch/arm/boot/dts/spear*
F: arch/arm/mach-spear/
SPEAR CLOCK FRAMEWORK SUPPORT
@@@ -11319,7 -11307,7 +11308,7 @@@ F: Documentation/usb/ehci.tx
F: drivers/usb/host/ehci*
USB GADGET/PERIPHERAL SUBSYSTEM
- M: Felipe Balbi <balbi(a)ti.com>
+ M: Felipe Balbi <balbi(a)kernel.org>
L: linux-usb(a)vger.kernel.org
W: http://www.linux-usb.org/gadget
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@@ -11395,7 -11383,7 +11384,7 @@@ S: Maintaine
F: drivers/net/usb/pegasus.*
USB PHY LAYER
- M: Felipe Balbi <balbi(a)ti.com>
+ M: Felipe Balbi <balbi(a)kernel.org>
L: linux-usb(a)vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
S: Maintained
@@@ -12025,7 -12013,6 +12014,6 @@@ F: arch/arm64/xen
F: arch/arm64/include/asm/xen/
XEN NETWORK BACKEND DRIVER
- M: Ian Campbell <ian.campbell(a)citrix.com>
M: Wei Liu <wei.liu2(a)citrix.com>
L: xen-devel(a)lists.xenproject.org (moderated for non-subscribers)
L: netdev(a)vger.kernel.org
@@@ -12134,7 -12121,7 +12122,7 @@@ F: drivers/net/hamradio/*scc.
F: drivers/net/hamradio/z8530.h
ZBUD COMPRESSED PAGE ALLOCATOR
- M: Seth Jennings <sjennings(a)variantweb.net>
+ M: Seth Jennings <sjenning(a)redhat.com>
L: linux-mm(a)kvack.org
S: Maintained
F: mm/zbud.c
@@@ -12189,7 -12176,7 +12177,7 @@@ F: include/linux/zsmalloc.
F: Documentation/vm/zsmalloc.txt
ZSWAP COMPRESSED SWAP CACHING
- M: Seth Jennings <sjennings(a)variantweb.net>
+ M: Seth Jennings <sjenning(a)redhat.com>
L: linux-mm(a)kvack.org
S: Maintained
F: mm/zswap.c
diff --combined drivers/net/bonding/bond_main.c
index 45bdd87,b7f1a99..a6527d5
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@@ -214,6 -214,8 +214,8 @@@ static void bond_uninit(struct net_devi
static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats);
static void bond_slave_arr_handler(struct work_struct *work);
+ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+ int mod);
/*---------------------------- General routines -----------------------------*/
@@@ -618,8 -620,8 +620,8 @@@ static void bond_hw_addr_swap(struct bo
static void bond_set_dev_addr(struct net_device *bond_dev,
struct net_device *slave_dev)
{
- netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n",
- bond_dev, slave_dev, slave_dev->addr_len);
+ netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->name=%s slave_dev->addr_len=%d\n",
+ bond_dev, slave_dev, slave_dev->name, slave_dev->addr_len);
memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
bond_dev->addr_assign_type = NET_ADDR_STOLEN;
call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
@@@ -928,10 -930,11 +930,10 @@@ void bond_select_active_slave(struct bo
if (!rv)
return;
- if (netif_carrier_ok(bond->dev)) {
+ if (netif_carrier_ok(bond->dev))
netdev_info(bond->dev, "first active interface up!\n");
- } else {
+ else
netdev_info(bond->dev, "now running without any active interface!\n");
- }
}
}
@@@ -1177,8 -1180,9 +1179,8 @@@ static rx_handler_result_t bond_handle_
}
}
- if (bond_should_deliver_exact_match(skb, slave, bond)) {
+ if (bond_should_deliver_exact_match(skb, slave, bond))
return RX_HANDLER_EXACT;
- }
skb->dev = bond->dev;
@@@ -1239,7 -1243,7 +1241,7 @@@ static struct slave *bond_alloc_slave(s
{
struct slave *slave = NULL;
- slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
+ slave = kzalloc(sizeof(*slave), GFP_KERNEL);
if (!slave)
return NULL;
@@@ -1379,7 -1383,8 +1381,7 @@@ int bond_enslave(struct net_device *bon
if (slave_dev->flags & IFF_UP) {
netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
slave_dev->name);
- res = -EPERM;
- goto err_undo_flags;
+ return -EPERM;
}
/* set bonding device ether type by slave - bonding netdevices are
@@@ -1399,7 -1404,8 +1401,7 @@@
res = notifier_to_errno(res);
if (res) {
netdev_err(bond_dev, "refused to change device type\n");
- res = -EBUSY;
- goto err_undo_flags;
+ return -EBUSY;
}
/* Flush unicast and multicast addresses */
@@@ -1419,7 -1425,8 +1421,7 @@@
} else if (bond_dev->type != slave_dev->type) {
netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
slave_dev->name, slave_dev->type, bond_dev->type);
- res = -EINVAL;
- goto err_undo_flags;
+ return -EINVAL;
}
if (slave_ops->ndo_set_mac_address == NULL) {
@@@ -2122,6 -2129,7 +2124,7 @@@ static void bond_miimon_commit(struct b
continue;
case BOND_LINK_UP:
+ bond_update_speed_duplex(slave);
bond_set_slave_link_state(slave, BOND_LINK_UP,
BOND_SLAVE_NOTIFY_NOW);
slave->last_link_up = jiffies;
@@@ -2454,7 -2462,7 +2457,7 @@@ int bond_arp_rcv(const struct sk_buff *
struct slave *slave)
{
struct arphdr *arp = (struct arphdr *)skb->data;
- struct slave *curr_active_slave;
+ struct slave *curr_active_slave, *curr_arp_slave;
unsigned char *arp_ptr;
__be32 sip, tip;
int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@@ -2501,26 -2509,41 +2504,41 @@@
&sip, &tip);
curr_active_slave = rcu_dereference(bond->curr_active_slave);
+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
- /* Backup slaves won't see the ARP reply, but do come through
- * here for each ARP probe (so we swap the sip/tip to validate
- * the probe). In a "redundant switch, common router" type of
- * configuration, the ARP probe will (hopefully) travel from
- * the active, through one switch, the router, then the other
- * switch before reaching the backup.
+ /* We 'trust' the received ARP enough to validate it if:
*
- * We 'trust' the arp requests if there is an active slave and
- * it received valid arp reply(s) after it became active. This
- * is done to avoid endless looping when we can't reach the
+ * (a) the slave receiving the ARP is active (which includes the
+ * current ARP slave, if any), or
+ *
+ * (b) the receiving slave isn't active, but there is a currently
+ * active slave and it received valid arp reply(s) after it became
+ * the currently active slave, or
+ *
+ * (c) there is an ARP slave that sent an ARP during the prior ARP
+ * interval, and we receive an ARP reply on any slave. We accept
+ * these because switch FDB update delays may deliver the ARP
+ * reply to a slave other than the sender of the ARP request.
+ *
+ * Note: for (b), backup slaves are receiving the broadcast ARP
+ * request, not a reply. This request passes from the sending
+ * slave through the L2 switch(es) to the receiving slave. Since
+ * this is checking the request, sip/tip are swapped for
+ * validation.
+ *
+ * This is done to avoid endless looping when we can't reach the
* arp_ip_target and fool ourselves with our own arp requests.
*/
-
if (bond_is_active_slave(slave))
bond_validate_arp(bond, slave, sip, tip);
else if (curr_active_slave &&
time_after(slave_last_rx(bond, curr_active_slave),
curr_active_slave->last_link_up))
bond_validate_arp(bond, slave, tip, sip);
+ else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+ bond_time_in_interval(bond,
+ dev_trans_start(curr_arp_slave->dev), 1))
+ bond_validate_arp(bond, slave, sip, tip);
out_unlock:
if (arp != (struct arphdr *)skb->data)
@@@ -3304,7 -3327,6 +3322,7 @@@ static struct rtnl_link_stats64 *bond_g
stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes;
stats->rx_errors += sstats->rx_errors - pstats->rx_errors;
stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped;
+ stats->rx_nohandler += sstats->rx_nohandler - pstats->rx_nohandler;
stats->tx_packets += sstats->tx_packets - pstats->tx_packets;;
stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes;
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 169920a,8ab000d..ff1507f
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@@ -69,7 -69,7 +69,7 @@@ MODULE_VERSION(DRV_MODULE_VERSION)
#define BNXT_RX_DMA_OFFSET NET_SKB_PAD
#define BNXT_RX_COPY_THRESH 256
- #define BNXT_TX_PUSH_THRESH 92
+ #define BNXT_TX_PUSH_THRESH 164
enum board_idx {
BCM57301,
@@@ -223,11 -223,12 +223,12 @@@ static netdev_tx_t bnxt_start_xmit(stru
}
if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
- struct tx_push_bd *push = txr->tx_push;
- struct tx_bd *tx_push = &push->txbd1;
- struct tx_bd_ext *tx_push1 = &push->txbd2;
- void *pdata = tx_push1 + 1;
- int j;
+ struct tx_push_buffer *tx_push_buf = txr->tx_push;
+ struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
+ struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
+ void *pdata = tx_push_buf->data;
+ u64 *end;
+ int j, push_len;
/* Set COAL_NOW to be ready quickly for the next push */
tx_push->tx_bd_len_flags_type =
@@@ -247,6 -248,9 +248,9 @@@
tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+ end = PTR_ALIGN(pdata + length + 1, 8) - 1;
+ *end = 0;
+
skb_copy_from_linear_data(skb, pdata, len);
pdata += len;
for (j = 0; j < last_frag; j++) {
@@@ -261,22 -265,29 +265,29 @@@
pdata += skb_frag_size(frag);
}
- memcpy(txbd, tx_push, sizeof(*txbd));
+ txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
+ txbd->tx_bd_haddr = txr->data_mapping;
prod = NEXT_TX(prod);
txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
memcpy(txbd, tx_push1, sizeof(*txbd));
prod = NEXT_TX(prod);
- push->doorbell =
+ tx_push->doorbell =
cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
txr->tx_prod = prod;
netdev_tx_sent_queue(txq, skb->len);
- __iowrite64_copy(txr->tx_doorbell, push,
- (length + sizeof(*push) + 8) / 8);
+ push_len = (length + sizeof(*tx_push) + 7) / 8;
+ if (push_len > 16) {
+ __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
+ __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+ push_len - 16);
+ } else {
+ __iowrite64_copy(txr->tx_doorbell, tx_push_buf,
+ push_len);
+ }
tx_buf->is_push = 1;
-
goto tx_done;
}
@@@ -1753,7 -1764,7 +1764,7 @@@ static int bnxt_alloc_tx_rings(struct b
push_size = L1_CACHE_ALIGN(sizeof(struct tx_push_bd) +
bp->tx_push_thresh);
- if (push_size > 128) {
+ if (push_size > 256) {
push_size = 0;
bp->tx_push_thresh = 0;
}
@@@ -1772,7 -1783,6 +1783,6 @@@
return rc;
if (bp->tx_push_size) {
- struct tx_bd *txbd;
dma_addr_t mapping;
/* One pre-allocated DMA buffer to backup
@@@ -1786,13 -1796,11 +1796,11 @@@
if (!txr->tx_push)
return -ENOMEM;
- txbd = &txr->tx_push->txbd1;
-
mapping = txr->tx_push_mapping +
sizeof(struct tx_push_bd);
- txbd->tx_bd_haddr = cpu_to_le64(mapping);
+ txr->data_mapping = cpu_to_le64(mapping);
- memset(txbd + 1, 0, sizeof(struct tx_bd_ext));
+ memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
}
ring->queue_id = bp->q_info[j].queue_id;
if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@@ -4546,20 -4554,18 +4554,18 @@@ static int bnxt_update_phy_setting(stru
if (!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) &&
link_info->force_pause_setting != link_info->req_flow_ctrl)
update_pause = true;
- if (link_info->req_duplex != link_info->duplex_setting)
- update_link = true;
if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
if (BNXT_AUTO_MODE(link_info->auto_mode))
update_link = true;
if (link_info->req_link_speed != link_info->force_link_speed)
update_link = true;
+ if (link_info->req_duplex != link_info->duplex_setting)
+ update_link = true;
} else {
if (link_info->auto_mode == BNXT_LINK_AUTO_NONE)
update_link = true;
if (link_info->advertising != link_info->auto_link_speeds)
update_link = true;
- if (link_info->req_link_speed != link_info->auto_link_speed)
- update_link = true;
}
if (update_link)
@@@ -4636,7 -4642,7 +4642,7 @@@ static int __bnxt_open_nic(struct bnxt
if (link_re_init) {
rc = bnxt_update_phy_setting(bp);
if (rc)
- goto open_err;
+ netdev_warn(bp->dev, "failed to update phy settings\n");
}
if (irq_re_init) {
@@@ -4654,6 -4660,7 +4660,7 @@@
/* Enable TX queues */
bnxt_tx_enable(bp);
mod_timer(&bp->timer, jiffies + bp->current_interval);
+ bnxt_update_link(bp, true);
return 0;
@@@ -5370,16 -5377,9 +5377,16 @@@ static int bnxt_change_mtu(struct net_d
return 0;
}
-static int bnxt_setup_tc(struct net_device *dev, u8 tc)
+static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ struct tc_to_netdev *ntc)
{
struct bnxt *bp = netdev_priv(dev);
+ u8 tc;
+
+ if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO)
+ return -EINVAL;
+
+ tc = ntc->tc;
if (tc > bp->max_tc) {
netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n",
@@@ -5677,22 -5677,16 +5684,16 @@@ static int bnxt_probe_phy(struct bnxt *
}
/*initialize the ethool setting copy with NVM settings */
- if (BNXT_AUTO_MODE(link_info->auto_mode))
- link_info->autoneg |= BNXT_AUTONEG_SPEED;
-
- if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
- if (link_info->auto_pause_setting == BNXT_LINK_PAUSE_BOTH)
- link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+ if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+ link_info->autoneg = BNXT_AUTONEG_SPEED |
+ BNXT_AUTONEG_FLOW_CTRL;
+ link_info->advertising = link_info->auto_link_speeds;
link_info->req_flow_ctrl = link_info->auto_pause_setting;
- } else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+ } else {
+ link_info->req_link_speed = link_info->force_link_speed;
+ link_info->req_duplex = link_info->duplex_setting;
link_info->req_flow_ctrl = link_info->force_pause_setting;
}
- link_info->req_duplex = link_info->duplex_setting;
- if (link_info->autoneg & BNXT_AUTONEG_SPEED)
- link_info->req_link_speed = link_info->auto_link_speed;
- else
- link_info->req_link_speed = link_info->force_link_speed;
- link_info->advertising = link_info->auto_link_speeds;
snprintf(phy_ver, PHY_VER_STR_LEN, " ph %d.%d.%d",
link_info->phy_ver[0],
link_info->phy_ver[1],
diff --combined drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 90ce93e,a009bc3..bfee298
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@@ -574,8 -574,7 +574,7 @@@ static inline void nicvf_set_rxhash(str
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
struct napi_struct *napi,
- struct cmp_queue *cq,
- struct cqe_rx_t *cqe_rx, int cqe_type)
+ struct cqe_rx_t *cqe_rx)
{
struct sk_buff *skb;
struct nicvf *nic = netdev_priv(netdev);
@@@ -591,7 -590,7 +590,7 @@@
}
/* Check for errors */
- err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
+ err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
if (err && !cqe_rx->rb_cnt)
return;
@@@ -682,8 -681,7 +681,7 @@@ loop
cq_idx, cq_desc->cqe_type);
switch (cq_desc->cqe_type) {
case CQE_TYPE_RX:
- nicvf_rcv_pkt_handler(netdev, napi, cq,
- cq_desc, CQE_TYPE_RX);
+ nicvf_rcv_pkt_handler(netdev, napi, cq_desc);
work_done++;
break;
case CQE_TYPE_SEND:
@@@ -828,7 -826,7 +826,7 @@@ static irqreturn_t nicvf_intr_handler(i
nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
/* Schedule NAPI */
- napi_schedule(&cq_poll->napi);
+ napi_schedule_irqoff(&cq_poll->napi);
/* Clear interrupt */
nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
@@@ -899,31 -897,6 +897,31 @@@ static void nicvf_disable_msix(struct n
}
}
+static void nicvf_set_irq_affinity(struct nicvf *nic)
+{
+ int vec, cpu;
+ int irqnum;
+
+ for (vec = 0; vec < nic->num_vec; vec++) {
+ if (!nic->irq_allocated[vec])
+ continue;
+
+ if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL))
+ return;
+ /* CQ interrupts */
+ if (vec < NICVF_INTR_ID_SQ)
+ /* Leave CPU0 for RBDR and other interrupts */
+ cpu = nicvf_netdev_qidx(nic, vec) + 1;
+ else
+ cpu = 0;
+
+ cpumask_set_cpu(cpumask_local_spread(cpu, nic->node),
+ nic->affinity_mask[vec]);
+ irqnum = nic->msix_entries[vec].vector;
+ irq_set_affinity_hint(irqnum, nic->affinity_mask[vec]);
+ }
+}
+
static int nicvf_register_interrupts(struct nicvf *nic)
{
int irq, ret = 0;
@@@ -969,13 -942,8 +967,13 @@@
ret = request_irq(nic->msix_entries[irq].vector,
nicvf_qs_err_intr_handler,
0, nic->irq_name[irq], nic);
- if (!ret)
- nic->irq_allocated[irq] = true;
+ if (ret)
+ goto err;
+
+ nic->irq_allocated[irq] = true;
+
+ /* Set IRQ affinities */
+ nicvf_set_irq_affinity(nic);
err:
if (ret)
@@@ -993,9 -961,6 +991,9 @@@ static void nicvf_unregister_interrupts
if (!nic->irq_allocated[irq])
continue;
+ irq_set_affinity_hint(nic->msix_entries[irq].vector, NULL);
+ free_cpumask_var(nic->affinity_mask[irq]);
+
if (irq < NICVF_INTR_ID_SQ)
free_irq(nic->msix_entries[irq].vector, nic->napi[irq]);
else
@@@ -1158,7 -1123,6 +1156,6 @@@ int nicvf_stop(struct net_device *netde
/* Clear multiqset info */
nic->pnicvf = nic;
- nic->sqs_count = 0;
return 0;
}
@@@ -1387,6 -1351,9 +1384,9 @@@ void nicvf_update_stats(struct nicvf *n
drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
stats->tx_bcast_frames_ok +
stats->tx_mcast_frames_ok;
+ drv_stats->rx_frames_ok = stats->rx_ucast_frames +
+ stats->rx_bcast_frames +
+ stats->rx_mcast_frames;
drv_stats->rx_drops = stats->rx_drop_red +
stats->rx_drop_overrun;
drv_stats->tx_drops = stats->tx_drops;
@@@ -1427,7 -1394,6 +1427,7 @@@ static void nicvf_tx_timeout(struct net
netdev_warn(dev, "%s: Transmit timed out, resetting\n",
dev->name);
+ nic->drv_stats.tx_timeout++;
schedule_work(&nic->reset_task);
}
@@@ -1572,6 -1538,9 +1572,9 @@@ static int nicvf_probe(struct pci_dev *
nicvf_send_vf_struct(nic);
+ if (!pass1_silicon(nic->pdev))
+ nic->hw_tso = true;
+
/* Check if this VF is in QS only mode */
if (nic->sqs_mode)
return 0;
@@@ -1591,9 -1560,6 +1594,6 @@@
netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
- if (!pass1_silicon(nic->pdev))
- nic->hw_tso = true;
-
netdev->netdev_ops = &nicvf_netdev_ops;
netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
diff --combined drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 5adb208,767347b..0dd1abf
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@@ -78,7 -78,7 +78,7 @@@ static void nicvf_free_q_desc_mem(struc
static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
u32 buf_len, u64 **rbuf)
{
- int order = get_order(buf_len);
+ int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0;
/* Check if request can be accomodated in previous allocated page */
if (nic->rb_page) {
@@@ -96,7 -96,8 +96,7 @@@
nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
order);
if (!nic->rb_page) {
- netdev_err(nic->netdev,
- "Failed to allocate new rcv buffer\n");
+ nic->drv_stats.rcv_buffer_alloc_failures++;
return -ENOMEM;
}
nic->rb_page_offset = 0;
@@@ -1328,16 -1329,12 +1328,12 @@@ void nicvf_update_sq_stats(struct nicv
}
/* Check for errors in the receive cmp.queue entry */
- int nicvf_check_cqe_rx_errs(struct nicvf *nic,
- struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
+ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
{
struct nicvf_hw_stats *stats = &nic->hw_stats;
- struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
- if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
- drv_stats->rx_frames_ok++;
+ if (!cqe_rx->err_level && !cqe_rx->err_opcode)
return 0;
- }
if (netif_msg_rx_err(nic))
netdev_err(nic->netdev,
diff --combined drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 01d6a96,f191a16..96d95cb
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@@ -69,15 -69,6 +69,15 @@@ int mlx4_en_setup_tc(struct net_device
return 0;
}
+static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ struct tc_to_netdev *tc)
+{
+ if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ return -EINVAL;
+
+ return mlx4_en_setup_tc(dev, tc->tc);
+}
+
#ifdef CONFIG_RFS_ACCEL
struct mlx4_en_filter {
@@@ -2353,8 -2344,6 +2353,6 @@@ out
/* set offloads */
priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
- priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
- priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL;
}
static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
@@@ -2365,8 -2354,6 +2363,6 @@@
/* unset offloads */
priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
- priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
- priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
VXLAN_STEER_BY_OUTER_MAC, 0);
@@@ -2475,7 -2462,7 +2471,7 @@@ static const struct net_device_ops mlx4
#endif
.ndo_set_features = mlx4_en_set_features,
.ndo_fix_features = mlx4_en_fix_features,
- .ndo_setup_tc = mlx4_en_setup_tc,
+ .ndo_setup_tc = __mlx4_en_setup_tc,
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = mlx4_en_filter_rfs,
#endif
@@@ -2513,7 -2500,7 +2509,7 @@@ static const struct net_device_ops mlx4
#endif
.ndo_set_features = mlx4_en_set_features,
.ndo_fix_features = mlx4_en_fix_features,
- .ndo_setup_tc = mlx4_en_setup_tc,
+ .ndo_setup_tc = __mlx4_en_setup_tc,
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = mlx4_en_filter_rfs,
#endif
@@@ -2989,6 -2976,11 +2985,11 @@@ int mlx4_en_init_netdev(struct mlx4_en_
priv->rss_hash_fn = ETH_RSS_HASH_TOP;
}
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+ dev->features |= NETIF_F_GSO_UDP_TUNNEL;
+ }
+
mdev->pndev[port] = dev;
mdev->upper[port] = NULL;
diff --combined drivers/net/ethernet/renesas/ravb_main.c
index 331c596,744d780..88656ce
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@@ -2,7 -2,7 +2,7 @@@
*
* Copyright (C) 2014-2015 Renesas Electronics Corporation
* Copyright (C) 2015 Renesas Solutions Corp.
- * Copyright (C) 2015 Cogent Embedded, Inc. <source(a)cogentembedded.com>
+ * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source(a)cogentembedded.com>
*
* Based on the SuperH Ethernet driver
*
@@@ -42,12 -42,6 +42,12 @@@
NETIF_MSG_RX_ERR | \
NETIF_MSG_TX_ERR)
+void ravb_modify(struct net_device *ndev, enum ravb_reg reg, u32 clear,
+ u32 set)
+{
+ ravb_write(ndev, (ravb_read(ndev, reg) & ~clear) | set, reg);
+}
+
int ravb_wait(struct net_device *ndev, enum ravb_reg reg, u32 mask, u32 value)
{
int i;
@@@ -65,7 -59,8 +65,7 @@@ static int ravb_config(struct net_devic
int error;
/* Set config mode */
- ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG,
- CCC);
+ ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
/* Check if the operating mode is changed to the config mode */
error = ravb_wait(ndev, CSR, CSR_OPS, CSR_OPS_CONFIG);
if (error)
@@@ -77,8 -72,13 +77,8 @@@
static void ravb_set_duplex(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
- u32 ecmr = ravb_read(ndev, ECMR);
- if (priv->duplex) /* Full */
- ecmr |= ECMR_DM;
- else /* Half */
- ecmr &= ~ECMR_DM;
- ravb_write(ndev, ecmr, ECMR);
+ ravb_modify(ndev, ECMR, ECMR_DM, priv->duplex ? ECMR_DM : 0);
}
static void ravb_set_rate(struct net_device *ndev)
@@@ -92,6 -92,8 +92,6 @@@
case 1000: /* 1000BASE */
ravb_write(ndev, GECMR_SPEED_1000, GECMR);
break;
- default:
- break;
}
}
@@@ -129,8 -131,13 +129,8 @@@ static void ravb_mdio_ctrl(struct mdiob
{
struct ravb_private *priv = container_of(ctrl, struct ravb_private,
mdiobb);
- u32 pir = ravb_read(priv->ndev, PIR);
- if (set)
- pir |= mask;
- else
- pir &= ~mask;
- ravb_write(priv->ndev, pir, PIR);
+ ravb_modify(priv->ndev, PIR, mask, set ? mask : 0);
}
/* MDC pin control */
@@@ -386,9 -393,9 +386,9 @@@ static int ravb_dmac_init(struct net_de
ravb_ring_format(ndev, RAVB_NC);
#if defined(__LITTLE_ENDIAN)
- ravb_write(ndev, ravb_read(ndev, CCC) & ~CCC_BOC, CCC);
+ ravb_modify(ndev, CCC, CCC_BOC, 0);
#else
- ravb_write(ndev, ravb_read(ndev, CCC) | CCC_BOC, CCC);
+ ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC);
#endif
/* Set AVB RX */
@@@ -411,7 -418,8 +411,7 @@@
ravb_write(ndev, TIC_FTE0 | TIC_FTE1 | TIC_TFUE, TIC);
/* Setting the control will start the AVB-DMAC process. */
- ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_OPERATION,
- CCC);
+ ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION);
return 0;
}
@@@ -485,7 -493,7 +485,7 @@@ static void ravb_get_tx_tstamp(struct n
break;
}
}
- ravb_write(ndev, ravb_read(ndev, TCCR) | TCCR_TFR, TCCR);
+ ravb_modify(ndev, TCCR, TCCR_TFR, TCCR_TFR);
}
}
@@@ -605,13 -613,13 +605,13 @@@ static bool ravb_rx(struct net_device *
static void ravb_rcv_snd_disable(struct net_device *ndev)
{
/* Disable TX and RX */
- ravb_write(ndev, ravb_read(ndev, ECMR) & ~(ECMR_RE | ECMR_TE), ECMR);
+ ravb_modify(ndev, ECMR, ECMR_RE | ECMR_TE, 0);
}
static void ravb_rcv_snd_enable(struct net_device *ndev)
{
/* Enable TX and RX */
- ravb_write(ndev, ravb_read(ndev, ECMR) | ECMR_RE | ECMR_TE, ECMR);
+ ravb_modify(ndev, ECMR, ECMR_RE | ECMR_TE, ECMR_RE | ECMR_TE);
}
/* function for waiting dma process finished */
@@@ -804,8 -812,8 +804,8 @@@ static int ravb_poll(struct napi_struc
/* Re-enable RX/TX interrupts */
spin_lock_irqsave(&priv->lock, flags);
- ravb_write(ndev, ravb_read(ndev, RIC0) | mask, RIC0);
- ravb_write(ndev, ravb_read(ndev, TIC) | mask, TIC);
+ ravb_modify(ndev, RIC0, mask, mask);
+ ravb_modify(ndev, TIC, mask, mask);
mmiowb();
spin_unlock_irqrestore(&priv->lock, flags);
@@@ -844,7 -852,8 +844,7 @@@ static void ravb_adjust_link(struct net
ravb_set_rate(ndev);
}
if (!priv->link) {
- ravb_write(ndev, ravb_read(ndev, ECMR) & ~ECMR_TXF,
- ECMR);
+ ravb_modify(ndev, ECMR, ECMR_TXF, 0);
new_state = true;
priv->link = phydev->link;
if (priv->no_avb_link)
@@@ -1130,7 -1139,8 +1130,8 @@@ static int ravb_set_ringparam(struct ne
if (netif_running(ndev)) {
netif_device_detach(ndev);
/* Stop PTP Clock driver */
- ravb_ptp_stop(ndev);
+ if (priv->chip_id == RCAR_GEN2)
+ ravb_ptp_stop(ndev);
/* Wait for DMA stopping */
error = ravb_stop_dma(ndev);
if (error) {
@@@ -1161,7 -1171,8 +1162,8 @@@
ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
- ravb_ptp_init(ndev, priv->pdev);
+ if (priv->chip_id == RCAR_GEN2)
+ ravb_ptp_init(ndev, priv->pdev);
netif_device_attach(ndev);
}
@@@ -1289,7 -1300,8 +1291,8 @@@ static void ravb_tx_timeout_work(struc
netif_tx_stop_all_queues(ndev);
/* Stop PTP Clock driver */
- ravb_ptp_stop(ndev);
+ if (priv->chip_id == RCAR_GEN2)
+ ravb_ptp_stop(ndev);
/* Wait for DMA stopping */
ravb_stop_dma(ndev);
@@@ -1302,7 -1314,8 +1305,8 @@@
ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
- ravb_ptp_init(ndev, priv->pdev);
+ if (priv->chip_id == RCAR_GEN2)
+ ravb_ptp_init(ndev, priv->pdev);
netif_tx_start_all_queues(ndev);
}
@@@ -1384,7 -1397,7 +1388,7 @@@ static netdev_tx_t ravb_start_xmit(stru
desc--;
desc->die_dt = DT_FSTART;
- ravb_write(ndev, ravb_read(ndev, TCCR) | (TCCR_TSRQ0 << q), TCCR);
+ ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q);
priv->cur_tx[q] += NUM_TX_DESC;
if (priv->cur_tx[q] - priv->dirty_tx[q] >
@@@ -1459,10 -1472,15 +1463,10 @@@ static void ravb_set_rx_mode(struct net
{
struct ravb_private *priv = netdev_priv(ndev);
unsigned long flags;
- u32 ecmr;
spin_lock_irqsave(&priv->lock, flags);
- ecmr = ravb_read(ndev, ECMR);
- if (ndev->flags & IFF_PROMISC)
- ecmr |= ECMR_PRM;
- else
- ecmr &= ~ECMR_PRM;
- ravb_write(ndev, ecmr, ECMR);
+ ravb_modify(ndev, ECMR, ECMR_PRM,
+ ndev->flags & IFF_PROMISC ? ECMR_PRM : 0);
mmiowb();
spin_unlock_irqrestore(&priv->lock, flags);
}
@@@ -1790,25 -1808,23 +1794,21 @@@ static int ravb_probe(struct platform_d
/* Set AVB config mode */
if (chip_id == RCAR_GEN2) {
- ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) |
- CCC_OPC_CONFIG, CCC);
+ ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
/* Set CSEL value */
- ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) |
- CCC_CSEL_HPB, CCC);
+ ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
} else {
- ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) |
- CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC);
+ ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
+ CCC_GAC | CCC_CSEL_HPB);
}
- /* Set CSEL value */
- ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB,
- CCC);
-
/* Set GTI value */
error = ravb_set_gti(ndev);
if (error)
goto out_release;
/* Request GTI loading */
- ravb_write(ndev, ravb_read(ndev, GCCR) | GCCR_LTI, GCCR);
+ ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
/* Allocate descriptor base address table */
priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM;
diff --combined drivers/net/ethernet/ti/netcp_core.c
index 06a0a73,029841f..ed0c30f
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@@ -117,21 -117,17 +117,17 @@@ static void get_pkt_info(dma_addr_t *bu
*ndesc = le32_to_cpu(desc->next_desc);
}
- static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc)
+ static u32 get_sw_data(int index, struct knav_dma_desc *desc)
{
- *pad0 = le32_to_cpu(desc->pad[0]);
- *pad1 = le32_to_cpu(desc->pad[1]);
- *pad2 = le32_to_cpu(desc->pad[2]);
+ /* No Endian conversion needed as this data is untouched by hw */
+ return desc->sw_data[index];
}
- static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc)
- {
- u64 pad64;
-
- pad64 = le32_to_cpu(desc->pad[0]) +
- ((u64)le32_to_cpu(desc->pad[1]) << 32);
- *padptr = (void *)(uintptr_t)pad64;
- }
+ /* use these macros to get sw data */
+ #define GET_SW_DATA0(desc) get_sw_data(0, desc)
+ #define GET_SW_DATA1(desc) get_sw_data(1, desc)
+ #define GET_SW_DATA2(desc) get_sw_data(2, desc)
+ #define GET_SW_DATA3(desc) get_sw_data(3, desc)
static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len,
struct knav_dma_desc *desc)
@@@ -163,13 -159,18 +159,18 @@@ static void set_desc_info(u32 desc_info
desc->packet_info = cpu_to_le32(pkt_info);
}
- static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc)
+ static void set_sw_data(int index, u32 data, struct knav_dma_desc *desc)
{
- desc->pad[0] = cpu_to_le32(pad0);
- desc->pad[1] = cpu_to_le32(pad1);
- desc->pad[2] = cpu_to_le32(pad1);
+ /* No Endian conversion needed as this data is untouched by hw */
+ desc->sw_data[index] = data;
}
+ /* use these macros to set sw data */
+ #define SET_SW_DATA0(data, desc) set_sw_data(0, data, desc)
+ #define SET_SW_DATA1(data, desc) set_sw_data(1, data, desc)
+ #define SET_SW_DATA2(data, desc) set_sw_data(2, data, desc)
+ #define SET_SW_DATA3(data, desc) set_sw_data(3, data, desc)
+
static void set_org_pkt_info(dma_addr_t buff, u32 buff_len,
struct knav_dma_desc *desc)
{
@@@ -581,7 -582,6 +582,6 @@@ static void netcp_free_rx_desc_chain(st
dma_addr_t dma_desc, dma_buf;
unsigned int buf_len, dma_sz = sizeof(*ndesc);
void *buf_ptr;
- u32 pad[2];
u32 tmp;
get_words(&dma_desc, 1, &desc->next_desc);
@@@ -593,14 -593,20 +593,20 @@@
break;
}
get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
- get_pad_ptr(&buf_ptr, ndesc);
+ /* warning!!!! We are retrieving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ buf_ptr = (void *)GET_SW_DATA0(ndesc);
+ buf_len = (int)GET_SW_DATA1(desc);
dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
__free_page(buf_ptr);
knav_pool_desc_put(netcp->rx_pool, desc);
}
-
- get_pad_info(&pad[0], &pad[1], &buf_len, desc);
- buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+ /* warning!!!! We are retrieving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ buf_ptr = (void *)GET_SW_DATA0(desc);
+ buf_len = (int)GET_SW_DATA1(desc);
if (buf_ptr)
netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
@@@ -639,7 -645,6 +645,6 @@@ static int netcp_process_one_rx_packet(
dma_addr_t dma_desc, dma_buff;
struct netcp_packet p_info;
struct sk_buff *skb;
- u32 pad[2];
void *org_buf_ptr;
dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
@@@ -653,8 -658,11 +658,11 @@@
}
get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
- get_pad_info(&pad[0], &pad[1], &org_buf_len, desc);
- org_buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+ /* warning!!!! We are retrieving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ org_buf_ptr = (void *)GET_SW_DATA0(desc);
+ org_buf_len = (int)GET_SW_DATA1(desc);
if (unlikely(!org_buf_ptr)) {
dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
@@@ -679,7 -687,6 +687,6 @@@
/* Fill in the page fragment list */
while (dma_desc) {
struct page *page;
- void *ptr;
ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
if (unlikely(!ndesc)) {
@@@ -688,8 -695,10 +695,10 @@@
}
get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
- get_pad_ptr(&ptr, ndesc);
- page = ptr;
+ /* warning!!!! We are retrieving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ page = (struct page *)GET_SW_DATA0(desc);
if (likely(dma_buff && buf_len && page)) {
dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
@@@ -777,7 -786,10 +786,10 @@@ static void netcp_free_rx_buf(struct ne
}
get_org_pkt_info(&dma, &buf_len, desc);
- get_pad_ptr(&buf_ptr, desc);
+ /* warning!!!! We are retrieving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ buf_ptr = (void *)GET_SW_DATA0(desc);
if (unlikely(!dma)) {
dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n");
@@@ -829,7 -841,7 +841,7 @@@ static int netcp_allocate_rx_buf(struc
struct page *page;
dma_addr_t dma;
void *bufptr;
- u32 pad[3];
+ u32 sw_data[2];
/* Allocate descriptor */
hwdesc = knav_pool_desc_get(netcp->rx_pool);
@@@ -846,7 -858,7 +858,7 @@@
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
bufptr = netdev_alloc_frag(primary_buf_len);
- pad[2] = primary_buf_len;
+ sw_data[1] = primary_buf_len;
if (unlikely(!bufptr)) {
dev_warn_ratelimited(netcp->ndev_dev,
@@@ -858,9 -870,10 +870,10 @@@
if (unlikely(dma_mapping_error(netcp->dev, dma)))
goto fail;
- pad[0] = lower_32_bits((uintptr_t)bufptr);
- pad[1] = upper_32_bits((uintptr_t)bufptr);
-
+ /* warning!!!! We are saving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ sw_data[0] = (u32)bufptr;
} else {
/* Allocate a secondary receive queue entry */
page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
@@@ -870,9 -883,11 +883,11 @@@
}
buf_len = PAGE_SIZE;
dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE);
- pad[0] = lower_32_bits(dma);
- pad[1] = upper_32_bits(dma);
- pad[2] = 0;
+ /* warning!!!! We are saving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ sw_data[0] = (u32)page;
+ sw_data[1] = 0;
}
desc_info = KNAV_DMA_DESC_PS_INFO_IN_DESC;
@@@ -882,7 -897,8 +897,8 @@@
pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) <<
KNAV_DMA_DESC_RETQ_SHIFT;
set_org_pkt_info(dma, buf_len, hwdesc);
- set_pad_info(pad[0], pad[1], pad[2], hwdesc);
+ SET_SW_DATA0(sw_data[0], hwdesc);
+ SET_SW_DATA1(sw_data[1], hwdesc);
set_desc_info(desc_info, pkt_info, hwdesc);
/* Push to FDQs */
@@@ -971,7 -987,6 +987,6 @@@ static int netcp_process_tx_compl_packe
unsigned int budget)
{
struct knav_dma_desc *desc;
- void *ptr;
struct sk_buff *skb;
unsigned int dma_sz;
dma_addr_t dma;
@@@ -988,8 -1003,10 +1003,10 @@@
continue;
}
- get_pad_ptr(&ptr, desc);
- skb = ptr;
+ /* warning!!!! We are retrieving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ skb = (struct sk_buff *)GET_SW_DATA0(desc);
netcp_free_tx_desc_chain(netcp, desc, dma_sz);
if (!skb) {
dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
@@@ -1194,10 -1211,10 +1211,10 @@@ static int netcp_tx_submit_skb(struct n
}
set_words(&tmp, 1, &desc->packet_info);
- tmp = lower_32_bits((uintptr_t)&skb);
- set_words(&tmp, 1, &desc->pad[0]);
- tmp = upper_32_bits((uintptr_t)&skb);
- set_words(&tmp, 1, &desc->pad[1]);
+ /* warning!!!! We are saving the virtual ptr in the sw_data
+ * field as a 32bit value. Will not work on 64bit machines
+ */
+ SET_SW_DATA0((u32)skb, desc);
if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) {
tmp = tx_pipe->switch_to_port;
@@@ -1835,26 -1852,22 +1852,26 @@@ static u16 netcp_select_queue(struct ne
return 0;
}
-static int netcp_setup_tc(struct net_device *dev, u8 num_tc)
+static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ struct tc_to_netdev *tc)
{
int i;
/* setup tc must be called under rtnl lock */
ASSERT_RTNL();
+ if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ return -EINVAL;
+
/* Sanity-check the number of traffic classes requested */
if ((dev->real_num_tx_queues <= 1) ||
- (dev->real_num_tx_queues < num_tc))
+ (dev->real_num_tx_queues < tc->tc))
return -EINVAL;
/* Configure traffic class to queue mappings */
- if (num_tc) {
- netdev_set_num_tc(dev, num_tc);
- for (i = 0; i < num_tc; i++)
+ if (tc->tc) {
+ netdev_set_num_tc(dev, tc->tc);
+ for (i = 0; i < tc->tc; i++)
netdev_set_tc_queue(dev, i, 1, i);
} else {
netdev_reset_tc(dev);
diff --combined drivers/net/geneve.c
index 8fa8388,0bf7edd..bc5da35
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@@ -72,11 -72,10 +72,11 @@@ struct geneve_dev
bool collect_md;
struct gro_cells gro_cells;
u32 flags;
+ struct dst_cache dst_cache;
};
/* Geneve device flags */
-#define GENEVE_F_UDP_CSUM BIT(0)
+#define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0)
#define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1)
#define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2)
@@@ -110,11 -109,6 +110,11 @@@ static __be64 vni_to_tunnel_id(const __
#endif
}
+static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
+{
+ return gs->sock->sk->sk_family;
+}
+
static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
__be32 addr, u8 vni[])
{
@@@ -158,60 -152,58 +158,60 @@@ static inline struct genevehdr *geneve_
return (struct genevehdr *)(udp_hdr(skb) + 1);
}
-/* geneve receive/decap routine */
-static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
+static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
+ struct sk_buff *skb)
{
- struct genevehdr *gnvh = geneve_hdr(skb);
- struct metadata_dst *tun_dst = NULL;
- struct geneve_dev *geneve = NULL;
- struct pcpu_sw_netstats *stats;
- struct iphdr *iph = NULL;
+ u8 *vni;
__be32 addr;
static u8 zero_vni[3];
- u8 *vni;
- int err = 0;
- sa_family_t sa_family;
#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6hdr *ip6h = NULL;
- struct in6_addr addr6;
static struct in6_addr zero_addr6;
#endif
- sa_family = gs->sock->sk->sk_family;
+ if (geneve_get_sk_family(gs) == AF_INET) {
+ struct iphdr *iph;
- if (sa_family == AF_INET) {
iph = ip_hdr(skb); /* outer IP header... */
if (gs->collect_md) {
vni = zero_vni;
addr = 0;
} else {
- vni = gnvh->vni;
-
+ vni = geneve_hdr(skb)->vni;
addr = iph->saddr;
}
- geneve = geneve_lookup(gs, addr, vni);
+ return geneve_lookup(gs, addr, vni);
#if IS_ENABLED(CONFIG_IPV6)
- } else if (sa_family == AF_INET6) {
+ } else if (geneve_get_sk_family(gs) == AF_INET6) {
+ struct ipv6hdr *ip6h;
+ struct in6_addr addr6;
+
ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
if (gs->collect_md) {
vni = zero_vni;
addr6 = zero_addr6;
} else {
- vni = gnvh->vni;
-
+ vni = geneve_hdr(skb)->vni;
addr6 = ip6h->saddr;
}
- geneve = geneve6_lookup(gs, addr6, vni);
+ return geneve6_lookup(gs, addr6, vni);
#endif
}
- if (!geneve)
- goto drop;
+ return NULL;
+}
+
+/* geneve receive/decap routine */
+static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
+ struct sk_buff *skb)
+{
+ struct genevehdr *gnvh = geneve_hdr(skb);
+ struct metadata_dst *tun_dst = NULL;
+ struct pcpu_sw_netstats *stats;
+ int err = 0;
+ void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
__be16 flags;
@@@ -220,7 -212,7 +220,7 @@@
(gnvh->oam ? TUNNEL_OAM : 0) |
(gnvh->critical ? TUNNEL_CRIT_OPT : 0);
- tun_dst = udp_tun_rx_dst(skb, sa_family, flags,
+ tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
vni_to_tunnel_id(gnvh->vni),
gnvh->opt_len * 4);
if (!tun_dst)
@@@ -237,6 -229,7 +237,6 @@@
}
skb_reset_mac_header(skb);
- skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
skb->protocol = eth_type_trans(skb, geneve->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@@ -247,27 -240,25 +247,27 @@@
if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
goto drop;
+ oiph = skb_network_header(skb);
skb_reset_network_header(skb);
- if (iph)
- err = IP_ECN_decapsulate(iph, skb);
+ if (geneve_get_sk_family(gs) == AF_INET)
+ err = IP_ECN_decapsulate(oiph, skb);
#if IS_ENABLED(CONFIG_IPV6)
- if (ip6h)
- err = IP6_ECN_decapsulate(ip6h, skb);
+ else
+ err = IP6_ECN_decapsulate(oiph, skb);
#endif
if (unlikely(err)) {
if (log_ecn_error) {
- if (iph)
+ if (geneve_get_sk_family(gs) == AF_INET)
net_info_ratelimited("non-ECT from %pI4 "
"with TOS=%#x\n",
- &iph->saddr, iph->tos);
+ &((struct iphdr *)oiph)->saddr,
+ ((struct iphdr *)oiph)->tos);
#if IS_ENABLED(CONFIG_IPV6)
- if (ip6h)
+ else
net_info_ratelimited("non-ECT from %pI6\n",
- &ip6h->saddr);
+ &((struct ipv6hdr *)oiph)->saddr);
#endif
}
if (err > 1) {
@@@ -306,13 -297,6 +306,13 @@@ static int geneve_init(struct net_devic
return err;
}
+ err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
+ if (err) {
+ free_percpu(dev->tstats);
+ gro_cells_destroy(&geneve->gro_cells);
+ return err;
+ }
+
return 0;
}
@@@ -320,7 -304,6 +320,7 @@@ static void geneve_uninit(struct net_de
{
struct geneve_dev *geneve = netdev_priv(dev);
+ dst_cache_destroy(&geneve->dst_cache);
gro_cells_destroy(&geneve->gro_cells);
free_percpu(dev->tstats);
}
@@@ -329,7 -312,6 +329,7 @@@
static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct genevehdr *geneveh;
+ struct geneve_dev *geneve;
struct geneve_sock *gs;
int opts_len;
@@@ -345,21 -327,16 +345,21 @@@
if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
goto error;
- opts_len = geneveh->opt_len * 4;
- if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- htons(ETH_P_TEB)))
- goto drop;
-
gs = rcu_dereference_sk_user_data(sk);
if (!gs)
goto drop;
- geneve_rx(gs, skb);
+ geneve = geneve_lookup_skb(gs, skb);
+ if (!geneve)
+ goto drop;
+
+ opts_len = geneveh->opt_len * 4;
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+ htons(ETH_P_TEB),
+ !net_eq(geneve->net, dev_net(geneve->dev))))
+ goto drop;
+
+ geneve_rx(geneve, gs, skb);
return 0;
drop:
@@@ -406,7 -383,7 +406,7 @@@ static void geneve_notify_add_rx_port(s
struct net_device *dev;
struct sock *sk = gs->sock->sk;
struct net *net = sock_net(sk);
- sa_family_t sa_family = sk->sk_family;
+ sa_family_t sa_family = geneve_get_sk_family(gs);
__be16 port = inet_sk(sk)->inet_sport;
int err;
@@@ -567,7 -544,7 +567,7 @@@ static void geneve_notify_del_rx_port(s
struct net_device *dev;
struct sock *sk = gs->sock->sk;
struct net *net = sock_net(sk);
- sa_family_t sa_family = sk->sk_family;
+ sa_family_t sa_family = geneve_get_sk_family(gs);
__be16 port = inet_sk(sk)->inet_sport;
rcu_read_lock();
@@@ -610,7 -587,7 +610,7 @@@ static struct geneve_sock *geneve_find_
list_for_each_entry(gs, &gn->sock_list, list) {
if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
- inet_sk(gs->sock->sk)->sk.sk_family == family) {
+ geneve_get_sk_family(gs) == family) {
return gs;
}
}
@@@ -703,7 -680,7 +703,7 @@@ static int geneve_build_skb(struct rtab
struct genevehdr *gnvh;
int min_headroom;
int err;
- bool udp_sum = !!(flags & GENEVE_F_UDP_CSUM);
+ bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX);
skb_scrub_packet(skb, xnet);
@@@ -776,9 -753,7 +776,9 @@@ static struct rtable *geneve_get_v4_rt(
struct ip_tunnel_info *info)
{
struct geneve_dev *geneve = netdev_priv(dev);
+ struct dst_cache *dst_cache;
struct rtable *rt = NULL;
+ bool use_cache = true;
__u8 tos;
memset(fl4, 0, sizeof(*fl4));
@@@ -789,26 -764,16 +789,26 @@@
fl4->daddr = info->key.u.ipv4.dst;
fl4->saddr = info->key.u.ipv4.src;
fl4->flowi4_tos = RT_TOS(info->key.tos);
+ dst_cache = &info->dst_cache;
} else {
tos = geneve->tos;
if (tos == 1) {
const struct iphdr *iip = ip_hdr(skb);
tos = ip_tunnel_get_dsfield(iip, skb);
+ use_cache = false;
}
fl4->flowi4_tos = RT_TOS(tos);
fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
+ dst_cache = &geneve->dst_cache;
+ }
+
+ use_cache = use_cache && !skb->mark;
+ if (use_cache) {
+ rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
+ if (rt)
+ return rt;
}
rt = ip_route_output_key(geneve->net, fl4);
@@@ -821,8 -786,6 +821,8 @@@
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
+ if (use_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
return rt;
}
@@@ -835,8 -798,6 +835,8 @@@ static struct dst_entry *geneve_get_v6_
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
+ struct dst_cache *dst_cache;
+ bool use_cache = true;
__u8 prio;
memset(fl6, 0, sizeof(*fl6));
@@@ -847,26 -808,16 +847,26 @@@
fl6->daddr = info->key.u.ipv6.dst;
fl6->saddr = info->key.u.ipv6.src;
fl6->flowi6_tos = RT_TOS(info->key.tos);
+ dst_cache = &info->dst_cache;
} else {
prio = geneve->tos;
if (prio == 1) {
const struct iphdr *iip = ip_hdr(skb);
prio = ip_tunnel_get_dsfield(iip, skb);
+ use_cache = false;
}
fl6->flowi6_tos = RT_TOS(prio);
fl6->daddr = geneve->remote.sin6.sin6_addr;
+ dst_cache = &geneve->dst_cache;
+ }
+
+ use_cache = use_cache && !skb->mark;
+ if (use_cache) {
+ dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
+ if (dst)
+ return dst;
}
if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
@@@ -879,8 -830,6 +879,8 @@@
return ERR_PTR(-ELOOP);
}
+ if (use_cache)
+ dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
return dst;
}
#endif
@@@ -944,9 -893,9 +944,9 @@@ static netdev_tx_t geneve_xmit_skb(stru
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
- flags |= GENEVE_F_UDP_CSUM;
+ flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX;
else
- flags &= ~GENEVE_F_UDP_CSUM;
+ flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
err = geneve_build_skb(rt, skb, key->tun_flags, vni,
info->options_len, opts, flags, xnet);
@@@ -972,7 -921,7 +972,7 @@@
udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
tos, ttl, df, sport, geneve->dst_port,
!net_eq(geneve->net, dev_net(geneve->dev)),
- !(flags & GENEVE_F_UDP_CSUM));
+ !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX));
return NETDEV_TX_OK;
@@@ -1090,6 -1039,34 +1090,34 @@@ static netdev_tx_t geneve_xmit(struct s
return geneve_xmit_skb(skb, dev, info);
}
+ static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
+ {
+ /* The max_mtu calculation does not take account of GENEVE
+ * options, to avoid excluding potentially valid
+ * configurations.
+ */
+ int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
+ - dev->hard_header_len;
+
+ if (new_mtu < 68)
+ return -EINVAL;
+
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
+ dev->mtu = new_mtu;
+ return 0;
+ }
+
+ static int geneve_change_mtu(struct net_device *dev, int new_mtu)
+ {
+ return __geneve_change_mtu(dev, new_mtu, true);
+ }
+
static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@@ -1134,7 -1111,7 +1162,7 @@@ static const struct net_device_ops gene
.ndo_stop = geneve_stop,
.ndo_start_xmit = geneve_xmit,
.ndo_get_stats64 = ip_tunnel_get_stats64,
- .ndo_change_mtu = eth_change_mtu,
+ .ndo_change_mtu = geneve_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_fill_metadata_dst = geneve_fill_metadata_dst,
@@@ -1201,6 -1178,7 +1229,7 @@@ static void geneve_setup(struct net_dev
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
netif_keep_dst(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
eth_hw_addr_random(dev);
}
@@@ -1323,8 -1301,6 +1352,8 @@@ static int geneve_configure(struct net
return -EPERM;
}
+ dst_cache_reset(&geneve->dst_cache);
+
err = register_netdevice(dev);
if (err)
return err;
@@@ -1383,8 -1359,8 +1412,8 @@@ static int geneve_newlink(struct net *n
metadata = true;
if (data[IFLA_GENEVE_UDP_CSUM] &&
- nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
- flags |= GENEVE_F_UDP_CSUM;
+ !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
+ flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
@@@ -1454,7 -1430,7 +1483,7 @@@ static int geneve_fill_info(struct sk_b
}
if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
- !!(geneve->flags & GENEVE_F_UDP_CSUM)) ||
+ !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
!!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
@@@ -1494,12 -1470,23 +1523,23 @@@ struct net_device *geneve_dev_create_fb
return dev;
err = geneve_configure(net, dev, &geneve_remote_unspec,
- 0, 0, 0, htons(dst_port), true, 0);
- if (err) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
+ 0, 0, 0, htons(dst_port), true,
+ GENEVE_F_UDP_ZERO_CSUM6_RX);
+ if (err)
+ goto err;
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto err;
+
return dev;
+
+ err:
+ free_netdev(dev);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
diff --combined drivers/net/hyperv/netvsc_drv.c
index 202e2b1,98e34fe..2b6595e
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@@ -43,11 -43,6 +43,11 @@@
#define RING_SIZE_MIN 64
#define LINKCHANGE_INT (2 * HZ)
+#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \
+ NETIF_F_SG | \
+ NETIF_F_TSO | \
+ NETIF_F_TSO6 | \
+ NETIF_F_HW_CSUM)
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@@ -550,8 -545,6 +550,8 @@@ do_send
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
skb, packet, &pb);
+ /* timestamp packet in software */
+ skb_tx_timestamp(skb);
ret = netvsc_send(net_device_ctx->device_ctx, packet,
rndis_msg, &pb, skb);
@@@ -922,7 -915,6 +922,7 @@@ static const struct ethtool_ops ethtool
.get_link = ethtool_op_get_link,
.get_channels = netvsc_get_channels,
.set_channels = netvsc_set_channels,
+ .get_ts_info = ethtool_op_get_ts_info,
};
static const struct net_device_ops device_ops = {
@@@ -1089,12 -1081,17 +1089,15 @@@ static int netvsc_probe(struct hv_devic
net->netdev_ops = &device_ops;
- net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
- NETIF_F_TSO;
- net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
- NETIF_F_IP_CSUM | NETIF_F_TSO;
+ net->hw_features = NETVSC_HW_FEATURES;
+ net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
net->ethtool_ops = ðtool_ops;
SET_NETDEV_DEV(net, &dev->device);
+ /* We always need headroom for rndis header */
+ net->needed_headroom = RNDIS_AND_PPI_SIZE;
+
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
diff --combined drivers/net/phy/bcm7xxx.c
index 9b31104,db507e3..b881a7b1
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@@ -24,7 -24,7 +24,7 @@@
#define MII_BCM7XXX_100TX_FALSE_CAR 0x13
#define MII_BCM7XXX_100TX_DISC 0x14
#define MII_BCM7XXX_AUX_MODE 0x1d
-#define MII_BCM7XX_64CLK_MDIO BIT(12)
+#define MII_BCM7XXX_64CLK_MDIO BIT(12)
#define MII_BCM7XXX_TEST 0x1f
#define MII_BCM7XXX_SHD_MODE_2 BIT(2)
@@@ -247,13 -247,9 +247,9 @@@ static int bcm7xxx_config_init(struct p
int ret;
/* Enable 64 clock MDIO */
- phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO);
+ phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XXX_64CLK_MDIO);
phy_read(phydev, MII_BCM7XXX_AUX_MODE);
- /* Workaround only required for 100Mbits/sec capable PHYs */
- if (phydev->supported & PHY_GBIT_FEATURES)
- return 0;
-
/* set shadow mode 2 */
ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
MII_BCM7XXX_SHD_MODE_2, MII_BCM7XXX_SHD_MODE_2);
@@@ -270,7 -266,7 +266,7 @@@
phy_write(phydev, MII_BCM7XXX_100TX_FALSE_CAR, 0x7555);
/* reset shadow mode 2 */
- ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, MII_BCM7XXX_SHD_MODE_2, 0);
+ ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, MII_BCM7XXX_SHD_MODE_2);
if (ret < 0)
return ret;
@@@ -307,11 -303,6 +303,6 @@@ static int bcm7xxx_suspend(struct phy_d
return 0;
}
- static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
- {
- return 0;
- }
-
#define BCM7XXX_28NM_GPHY(_oui, _name) \
{ \
.phy_id = (_oui), \
@@@ -326,21 -317,6 +317,21 @@@
.resume = bcm7xxx_28nm_resume, \
}
+#define BCM7XXX_40NM_EPHY(_oui, _name) \
+{ \
+ .phy_id = (_oui), \
+ .phy_id_mask = 0xfffffff0, \
+ .name = _name, \
+ .features = PHY_BASIC_FEATURES | \
+ SUPPORTED_Pause | SUPPORTED_Asym_Pause, \
+ .flags = PHY_IS_INTERNAL, \
+ .config_init = bcm7xxx_config_init, \
+ .config_aneg = genphy_config_aneg, \
+ .read_status = genphy_read_status, \
+ .suspend = bcm7xxx_suspend, \
+ .resume = bcm7xxx_config_init, \
+}
+
static struct phy_driver bcm7xxx_driver[] = {
BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
@@@ -348,34 -324,43 +339,10 @@@
BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
-{
- .phy_id = PHY_ID_BCM7425,
- .phy_id_mask = 0xfffffff0,
- .name = "Broadcom BCM7425",
- .features = PHY_BASIC_FEATURES |
- SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- .flags = PHY_IS_INTERNAL,
- .config_init = bcm7xxx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
- .suspend = bcm7xxx_suspend,
- .resume = bcm7xxx_config_init,
-}, {
- .phy_id = PHY_ID_BCM7429,
- .phy_id_mask = 0xfffffff0,
- .name = "Broadcom BCM7429",
- .features = PHY_BASIC_FEATURES |
- SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- .flags = PHY_IS_INTERNAL,
- .config_init = bcm7xxx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
- .suspend = bcm7xxx_suspend,
- .resume = bcm7xxx_config_init,
-}, {
- .phy_id = PHY_ID_BCM7435,
- .phy_id_mask = 0xfffffff0,
- .name = "Broadcom BCM7435",
- .features = PHY_BASIC_FEATURES |
- SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- .flags = PHY_IS_INTERNAL,
- .config_init = bcm7xxx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
- .suspend = bcm7xxx_suspend,
- .resume = bcm7xxx_config_init,
-} };
+ BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
+ BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"),
+ BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"),
- {
- .phy_id = PHY_BCM_OUI_4,
- .phy_id_mask = 0xffff0000,
- .name = "Broadcom BCM7XXX 40nm",
- .features = PHY_GBIT_FEATURES |
- SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- .flags = PHY_IS_INTERNAL,
- .config_init = bcm7xxx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
- .suspend = bcm7xxx_suspend,
- .resume = bcm7xxx_config_init,
- }, {
- .phy_id = PHY_BCM_OUI_5,
- .phy_id_mask = 0xffffff00,
- .name = "Broadcom BCM7XXX 65nm",
- .features = PHY_BASIC_FEATURES |
- SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- .flags = PHY_IS_INTERNAL,
- .config_init = bcm7xxx_dummy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
- .suspend = bcm7xxx_suspend,
- .resume = bcm7xxx_config_init,
- } };
++};
static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
{ PHY_ID_BCM7250, 0xfffffff0, },
@@@ -386,8 -371,6 +353,6 @@@
{ PHY_ID_BCM7439, 0xfffffff0, },
{ PHY_ID_BCM7435, 0xfffffff0, },
{ PHY_ID_BCM7445, 0xfffffff0, },
- { PHY_BCM_OUI_4, 0xffff0000 },
- { PHY_BCM_OUI_5, 0xffffff00 },
{ }
};
diff --combined drivers/net/phy/marvell.c
index d0168f1,ab1d0fc..9fb9d80
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@@ -133,11 -133,6 +133,11 @@@
#define MII_88E3016_DISABLE_SCRAMBLER 0x0200
#define MII_88E3016_AUTO_MDIX_CROSSOVER 0x0030
+#define MII_88E1510_GEN_CTRL_REG_1 0x14
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK 0x7
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */
+#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
+
MODULE_DESCRIPTION("Marvell PHY driver");
MODULE_AUTHOR("Andy Fleming");
MODULE_LICENSE("GPL");
@@@ -443,41 -438,6 +443,41 @@@ static int m88e1318_config_aneg(struct
return m88e1121_config_aneg(phydev);
}
+static int m88e1510_config_init(struct phy_device *phydev)
+{
+ int err;
+ int temp;
+
+ /* SGMII-to-Copper mode initialization */
+ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+ /* Select page 18 */
+ err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 18);
+ if (err < 0)
+ return err;
+
+ /* In reg 20, write MODE[2:0] = 0x1 (SGMII to Copper) */
+ temp = phy_read(phydev, MII_88E1510_GEN_CTRL_REG_1);
+ temp &= ~MII_88E1510_GEN_CTRL_REG_1_MODE_MASK;
+ temp |= MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII;
+ err = phy_write(phydev, MII_88E1510_GEN_CTRL_REG_1, temp);
+ if (err < 0)
+ return err;
+
+ /* PHY reset is necessary after changing MODE[2:0] */
+ temp |= MII_88E1510_GEN_CTRL_REG_1_RESET;
+ err = phy_write(phydev, MII_88E1510_GEN_CTRL_REG_1, temp);
+ if (err < 0)
+ return err;
+
+ /* Reset page selection */
+ err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
static int m88e1510_config_aneg(struct phy_device *phydev)
{
int err;
@@@ -486,6 -446,12 +486,12 @@@
if (err < 0)
return err;
+ return 0;
+ }
+
+ static int marvell_config_init(struct phy_device *phydev)
+ {
+ /* Set registers from marvell,reg-init DT property */
return marvell_of_reg_init(phydev);
}
@@@ -535,7 -501,7 +541,7 @@@ static int m88e1116r_config_init(struc
mdelay(500);
- return 0;
+ return marvell_config_init(phydev);
}
static int m88e3016_config_init(struct phy_device *phydev)
@@@ -554,7 -520,7 +560,7 @@@
if (reg < 0)
return reg;
- return 0;
+ return marvell_config_init(phydev);
}
static int m88e1111_config_init(struct phy_device *phydev)
@@@ -1065,8 -1031,8 +1071,8 @@@ static u64 marvell_get_stat(struct phy_
{
struct marvell_hw_stat stat = marvell_hw_stats[i];
struct marvell_priv *priv = phydev->priv;
- int err, oldpage;
- u64 val;
+ int err, oldpage, val;
+ u64 ret;
oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
err = phy_write(phydev, MII_MARVELL_PHY_PAGE,
@@@ -1076,16 -1042,16 +1082,16 @@@
val = phy_read(phydev, stat.reg);
if (val < 0) {
- val = UINT64_MAX;
+ ret = UINT64_MAX;
} else {
val = val & ((1 << stat.bits) - 1);
priv->stats[i] += val;
- val = priv->stats[i];
+ ret = priv->stats[i];
}
phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
- return val;
+ return ret;
}
static void marvell_get_stats(struct phy_device *phydev,
@@@ -1118,6 -1084,7 +1124,7 @@@ static struct phy_driver marvell_driver
.features = PHY_GBIT_FEATURES,
.probe = marvell_probe,
.flags = PHY_HAS_INTERRUPT,
+ .config_init = &marvell_config_init,
.config_aneg = &marvell_config_aneg,
.read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
@@@ -1189,6 -1156,7 +1196,7 @@@
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
+ .config_init = &marvell_config_init,
.config_aneg = &m88e1121_config_aneg,
.read_status = &marvell_read_status,
.ack_interrupt = &marvell_ack_interrupt,
@@@ -1207,6 -1175,7 +1215,7 @@@
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
+ .config_init = &marvell_config_init,
.config_aneg = &m88e1318_config_aneg,
.read_status = &marvell_read_status,
.ack_interrupt = &marvell_ack_interrupt,
@@@ -1299,7 -1268,7 +1308,7 @@@
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
- .config_init = &marvell_config_init,
+ .config_init = &m88e1510_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
.ack_interrupt = &marvell_ack_interrupt,
@@@ -1318,6 -1287,7 +1327,7 @@@
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
+ .config_init = &marvell_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
.ack_interrupt = &marvell_ack_interrupt,
diff --combined drivers/net/vxlan.c
index 2ddc642,e6944b2..909f793
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -42,7 -42,7 +42,7 @@@
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include <net/protocol.h>
-#include <net/udp_tunnel.h>
+
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@@ -197,9 -197,9 +197,9 @@@ static int vxlan_nla_put_addr(struct sk
#endif
/* Virtual Network hash table head */
-static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
+static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
{
- return &vs->vni_list[hash_32(id, VNI_HASH_BITS)];
+ return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
}
/* Socket hash table head */
@@@ -242,16 -242,12 +242,16 @@@ static struct vxlan_sock *vxlan_find_so
return NULL;
}
-static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
{
struct vxlan_dev *vxlan;
- hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
- if (vxlan->default_dst.remote_vni == id)
+ /* For flow based devices, map all packets to VNI 0 */
+ if (vs->flags & VXLAN_F_COLLECT_METADATA)
+ vni = 0;
+
+ hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
+ if (vxlan->default_dst.remote_vni == vni)
return vxlan;
}
@@@ -259,7 -255,7 +259,7 @@@
}
/* Look up VNI in a per net namespace table */
-static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
+static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
sa_family_t family, __be16 port,
u32 flags)
{
@@@ -269,7 -265,7 +269,7 @@@
if (!vs)
return NULL;
- return vxlan_vs_find_vni(vs, id);
+ return vxlan_vs_find_vni(vs, vni);
}
/* Fill in neighbour message in skbuff. */
@@@ -319,7 -315,7 +319,7 @@@ static int vxlan_fdb_info(struct sk_buf
nla_put_be16(skb, NDA_PORT, rdst->remote_port))
goto nla_put_failure;
if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
- nla_put_u32(skb, NDA_VNI, rdst->remote_vni))
+ nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
goto nla_put_failure;
if (rdst->remote_ifindex &&
nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
@@@ -387,7 -383,7 +387,7 @@@ static void vxlan_ip_miss(struct net_de
};
struct vxlan_rdst remote = {
.remote_ip = *ipa, /* goes to NDA_DST */
- .remote_vni = VXLAN_N_VID,
+ .remote_vni = cpu_to_be32(VXLAN_N_VID),
};
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
@@@ -456,7 -452,7 +456,7 @@@ static struct vxlan_fdb *vxlan_find_mac
/* caller should hold vxlan->hash_lock */
static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port,
- __u32 vni, __u32 ifindex)
+ __be32 vni, __u32 ifindex)
{
struct vxlan_rdst *rd;
@@@ -473,8 -469,7 +473,8 @@@
/* Replace destination of unicast mac */
static int vxlan_fdb_replace(struct vxlan_fdb *f,
- union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex)
+ union vxlan_addr *ip, __be16 port, __be32 vni,
+ __u32 ifindex)
{
struct vxlan_rdst *rd;
@@@ -485,8 -480,6 +485,8 @@@
rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
if (!rd)
return 0;
+
+ dst_cache_reset(&rd->dst_cache);
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
@@@ -496,7 -489,7 +496,7 @@@
/* Add/update destinations for multicast */
static int vxlan_fdb_append(struct vxlan_fdb *f,
- union vxlan_addr *ip, __be16 port, __u32 vni,
+ union vxlan_addr *ip, __be16 port, __be32 vni,
__u32 ifindex, struct vxlan_rdst **rdp)
{
struct vxlan_rdst *rd;
@@@ -508,12 -501,6 +508,12 @@@
rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
if (rd == NULL)
return -ENOBUFS;
+
+ if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
+ kfree(rd);
+ return -ENOBUFS;
+ }
+
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
@@@ -528,8 -515,7 +528,8 @@@
static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
unsigned int off,
struct vxlanhdr *vh, size_t hdrlen,
- u32 data, struct gro_remcsum *grc,
+ __be32 vni_field,
+ struct gro_remcsum *grc,
bool nopartial)
{
size_t start, offset;
@@@ -540,8 -526,10 +540,8 @@@
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
- start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
- offset = start + ((data & VXLAN_RCO_UDP) ?
- offsetof(struct udphdr, check) :
- offsetof(struct tcphdr, check));
+ start = vxlan_rco_start(vni_field);
+ offset = start + vxlan_rco_offset(vni_field);
vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
start, offset, grc, nopartial);
@@@ -561,7 -549,7 +561,7 @@@ static struct sk_buff **vxlan_gro_recei
int flush = 1;
struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
udp_offloads);
- u32 flags;
+ __be32 flags;
struct gro_remcsum grc;
skb_gro_remcsum_init(&grc);
@@@ -577,11 -565,11 +577,11 @@@
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
- flags = ntohl(vh->vx_flags);
+ flags = vh->vx_flags;
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
- ntohl(vh->vx_vni), &grc,
+ vh->vx_vni, &grc,
!!(vs->flags &
VXLAN_F_REMCSUM_NOPARTIAL));
@@@ -672,7 -660,7 +672,7 @@@ static void vxlan_notify_del_rx_port(st
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
- __be16 port, __u32 vni, __u32 ifindex,
+ __be16 port, __be32 vni, __u32 ifindex,
__u8 ndm_flags)
{
struct vxlan_rdst *rd = NULL;
@@@ -761,10 -749,8 +761,10 @@@ static void vxlan_fdb_free(struct rcu_h
struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
struct vxlan_rdst *rd, *nd;
- list_for_each_entry_safe(rd, nd, &f->remotes, list)
+ list_for_each_entry_safe(rd, nd, &f->remotes, list) {
+ dst_cache_destroy(&rd->dst_cache);
kfree(rd);
+ }
kfree(f);
}
@@@ -781,8 -767,7 +781,8 @@@ static void vxlan_fdb_destroy(struct vx
}
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
- union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex)
+ union vxlan_addr *ip, __be16 *port, __be32 *vni,
+ u32 *ifindex)
{
struct net *net = dev_net(vxlan->dev);
int err;
@@@ -815,7 -800,7 +815,7 @@@
if (tb[NDA_VNI]) {
if (nla_len(tb[NDA_VNI]) != sizeof(u32))
return -EINVAL;
- *vni = nla_get_u32(tb[NDA_VNI]);
+ *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
} else {
*vni = vxlan->default_dst.remote_vni;
}
@@@ -845,8 -830,7 +845,8 @@@ static int vxlan_fdb_add(struct ndmsg *
/* struct net *net = dev_net(vxlan->dev); */
union vxlan_addr ip;
__be16 port;
- u32 vni, ifindex;
+ __be32 vni;
+ u32 ifindex;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@@ -883,8 -867,7 +883,8 @@@ static int vxlan_fdb_delete(struct ndms
struct vxlan_rdst *rd = NULL;
union vxlan_addr ip;
__be16 port;
- u32 vni, ifindex;
+ __be32 vni;
+ u32 ifindex;
int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
@@@ -1139,65 -1122,54 +1139,65 @@@ static int vxlan_igmp_leave(struct vxla
return ret;
}
-static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
- size_t hdrlen, u32 data, bool nopartial)
+static bool vxlan_remcsum(struct vxlanhdr *unparsed,
+ struct sk_buff *skb, u32 vxflags)
{
size_t start, offset, plen;
- if (skb->remcsum_offload)
- return vh;
+ if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
+ goto out;
- start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
- offset = start + ((data & VXLAN_RCO_UDP) ?
- offsetof(struct udphdr, check) :
- offsetof(struct tcphdr, check));
+ start = vxlan_rco_start(unparsed->vx_vni);
+ offset = start + vxlan_rco_offset(unparsed->vx_vni);
- plen = hdrlen + offset + sizeof(u16);
+ plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
if (!pskb_may_pull(skb, plen))
- return NULL;
+ return false;
- vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+ skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
+ !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
+out:
+ unparsed->vx_flags &= ~VXLAN_HF_RCO;
+ unparsed->vx_vni &= VXLAN_VNI_MASK;
+ return true;
+}
- skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset,
- nopartial);
+static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
+ struct vxlan_metadata *md,
+ struct metadata_dst *tun_dst)
+{
+ struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
- return vh;
+ if (!(unparsed->vx_flags & VXLAN_HF_GBP))
+ goto out;
+
+ md->gbp = ntohs(gbp->policy_id);
+
+ if (tun_dst)
+ tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+
+ if (gbp->dont_learn)
+ md->gbp |= VXLAN_GBP_DONT_LEARN;
+
+ if (gbp->policy_applied)
+ md->gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+out:
+ unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
- struct vxlan_metadata *md, u32 vni,
+static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs,
+ struct sk_buff *skb, struct vxlan_metadata *md,
struct metadata_dst *tun_dst)
{
struct iphdr *oip = NULL;
struct ipv6hdr *oip6 = NULL;
- struct vxlan_dev *vxlan;
struct pcpu_sw_netstats *stats;
union vxlan_addr saddr;
int err = 0;
- /* For flow based devices, map all packets to VNI 0 */
- if (vs->flags & VXLAN_F_COLLECT_METADATA)
- vni = 0;
-
- /* Is this VNI defined? */
- vxlan = vxlan_vs_find_vni(vs, vni);
- if (!vxlan)
- goto drop;
-
skb_reset_mac_header(skb);
- skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
skb->protocol = eth_type_trans(skb, vxlan->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@@ -1274,45 -1246,48 +1274,45 @@@ drop
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct metadata_dst *tun_dst = NULL;
+ struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
- struct vxlanhdr *vxh;
- u32 flags, vni;
+ struct vxlanhdr unparsed;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
/* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
- goto error;
-
- vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
- flags = ntohl(vxh->vx_flags);
- vni = ntohl(vxh->vx_vni);
+ return 1;
- if (flags & VXLAN_HF_VNI) {
- flags &= ~VXLAN_HF_VNI;
- } else {
- /* VNI flag always required to be set */
- goto bad_flags;
+ unparsed = *vxlan_hdr(skb);
+ /* VNI flag always required to be set */
+ if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
+ netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+ ntohl(vxlan_hdr(skb)->vx_flags),
+ ntohl(vxlan_hdr(skb)->vx_vni));
+ /* Return non vxlan pkt */
+ return 1;
}
-
- if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
- goto drop;
- vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+ unparsed.vx_flags &= ~VXLAN_HF_VNI;
+ unparsed.vx_vni &= ~VXLAN_VNI_MASK;
vs = rcu_dereference_sk_user_data(sk);
if (!vs)
goto drop;
- if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
- vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
- !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
- if (!vxh)
- goto drop;
+ vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni));
+ if (!vxlan)
+ goto drop;
- flags &= ~VXLAN_HF_RCO;
- vni &= VXLAN_VNI_MASK;
- }
+ if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
+ !net_eq(vxlan->net, dev_net(vxlan->dev))))
+ goto drop;
if (vxlan_collect_metadata(vs)) {
+ __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
- cpu_to_be64(vni >> 8), sizeof(*md));
+ vxlan_vni_to_tun_id(vni), sizeof(*md));
if (!tun_dst)
goto drop;
@@@ -1325,13 -1300,25 +1325,13 @@@
/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
- if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
- struct vxlanhdr_gbp *gbp;
-
- gbp = (struct vxlanhdr_gbp *)vxh;
- md->gbp = ntohs(gbp->policy_id);
-
- if (tun_dst)
- tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
-
- if (gbp->dont_learn)
- md->gbp |= VXLAN_GBP_DONT_LEARN;
-
- if (gbp->policy_applied)
- md->gbp |= VXLAN_GBP_POLICY_APPLIED;
-
- flags &= ~VXLAN_GBP_USED_BITS;
- }
+ if (vs->flags & VXLAN_F_REMCSUM_RX)
+ if (!vxlan_remcsum(&unparsed, skb, vs->flags))
+ goto drop;
+ if (vs->flags & VXLAN_F_GBP)
+ vxlan_parse_gbp_hdr(&unparsed, md, tun_dst);
- if (flags || vni & ~VXLAN_VNI_MASK) {
+ if (unparsed.vx_flags || unparsed.vx_vni) {
/* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from
* VXLAN RFC (RFC7348) which stipulates that bits in reserved
@@@ -1340,19 -1327,28 +1340,19 @@@
* is more robust and provides a little more security in
* adding extensions to VXLAN.
*/
-
- goto bad_flags;
+ goto drop;
}
- vxlan_rcv(vs, skb, md, vni >> 8, tun_dst);
+ vxlan_rcv(vxlan, vs, skb, md, tun_dst);
return 0;
drop:
- /* Consume bad packet */
- kfree_skb(skb);
- return 0;
-
-bad_flags:
- netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
- ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
-
-error:
if (tun_dst)
dst_release((struct dst_entry *)tun_dst);
- /* Return non vxlan pkt */
- return 1;
+ /* Consume bad packet */
+ kfree_skb(skb);
+ return 0;
}
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@@ -1677,7 -1673,7 +1677,7 @@@ static void vxlan_build_gbp_hdr(struct
return;
gbp = (struct vxlanhdr_gbp *)vxh;
- vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+ vxh->vx_flags |= VXLAN_HF_GBP;
if (md->gbp & VXLAN_GBP_DONT_LEARN)
gbp->dont_learn = 1;
@@@ -1688,15 -1684,20 +1688,15 @@@
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}
-#if IS_ENABLED(CONFIG_IPV6)
-static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr, __u8 prio, __u8 ttl,
- __be16 src_port, __be16 dst_port, __be32 vni,
- struct vxlan_metadata *md, bool xnet, u32 vxflags)
+static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
+ int iphdr_len, __be32 vni,
+ struct vxlan_metadata *md, u32 vxflags,
+ bool udp_sum)
{
struct vxlanhdr *vxh;
int min_headroom;
int err;
- bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- u16 hdrlen = sizeof(struct vxlanhdr);
if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
@@@ -1705,39 -1706,50 +1705,39 @@@
if (csum_start <= VXLAN_MAX_REMCSUM_START &&
!(csum_start & VXLAN_RCO_SHIFT_MASK) &&
(skb->csum_offset == offsetof(struct udphdr, check) ||
- skb->csum_offset == offsetof(struct tcphdr, check))) {
- udp_sum = false;
+ skb->csum_offset == offsetof(struct tcphdr, check)))
type |= SKB_GSO_TUNNEL_REMCSUM;
- }
}
- skb_scrub_packet(skb, xnet);
-
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- + VXLAN_HLEN + sizeof(struct ipv6hdr)
+ + VXLAN_HLEN + iphdr_len
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
if (unlikely(err)) {
kfree_skb(skb);
- goto err;
+ return err;
}
skb = vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb)) {
- err = -ENOMEM;
- goto err;
- }
+ if (WARN_ON(!skb))
+ return -ENOMEM;
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb)) {
- err = -EINVAL;
- goto err;
- }
+ skb = iptunnel_handle_offloads(skb, type);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- vxh->vx_flags = htonl(VXLAN_HF_VNI);
- vxh->vx_vni = vni;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vni);
if (type & SKB_GSO_TUNNEL_REMCSUM) {
- u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
- VXLAN_RCO_SHIFT;
+ unsigned int start;
- if (skb->csum_offset == offsetof(struct udphdr, check))
- data |= VXLAN_RCO_UDP;
-
- vxh->vx_vni |= htonl(data);
- vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+ start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
+ vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
+ vxh->vx_flags |= VXLAN_HF_RCO;
if (!skb_is_gso(skb)) {
skb->ip_summed = CHECKSUM_NONE;
@@@ -1749,63 -1761,102 +1749,63 @@@
vxlan_build_gbp_hdr(vxh, vxflags, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
-
- udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
- ttl, src_port, dst_port,
- !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
return 0;
-err:
- dst_release(dst);
- return err;
}
-#endif
-static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni,
- struct vxlan_metadata *md, bool xnet, u32 vxflags)
+static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
+ struct sk_buff *skb, int oif, u8 tos,
+ __be32 daddr, __be32 *saddr,
+ struct dst_cache *dst_cache,
+ struct ip_tunnel_info *info)
{
- struct vxlanhdr *vxh;
- int min_headroom;
- int err;
- bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
- int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- u16 hdrlen = sizeof(struct vxlanhdr);
-
- if ((vxflags & VXLAN_F_REMCSUM_TX) &&
- skb->ip_summed == CHECKSUM_PARTIAL) {
- int csum_start = skb_checksum_start_offset(skb);
-
- if (csum_start <= VXLAN_MAX_REMCSUM_START &&
- !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
- (skb->csum_offset == offsetof(struct udphdr, check) ||
- skb->csum_offset == offsetof(struct tcphdr, check))) {
- udp_sum = false;
- type |= SKB_GSO_TUNNEL_REMCSUM;
- }
- }
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + VXLAN_HLEN + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+ struct rtable *rt = NULL;
+ bool use_cache = false;
+ struct flowi4 fl4;
- /* Need space for new headers (invalidates iph ptr) */
- err = skb_cow_head(skb, min_headroom);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
+ /* when the ip_tunnel_info is availble, the tos used for lookup is
+ * packet independent, so we can use the cache
+ */
+ if (!skb->mark && (!tos || info)) {
+ use_cache = true;
+ rt = dst_cache_get_ip4(dst_cache, saddr);
+ if (rt)
+ return rt;
}
- skb = vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb))
- return -ENOMEM;
-
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- vxh->vx_flags = htonl(VXLAN_HF_VNI);
- vxh->vx_vni = vni;
-
- if (type & SKB_GSO_TUNNEL_REMCSUM) {
- u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
- VXLAN_RCO_SHIFT;
-
- if (skb->csum_offset == offsetof(struct udphdr, check))
- data |= VXLAN_RCO_UDP;
-
- vxh->vx_vni |= htonl(data);
- vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_oif = oif;
+ fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.daddr = daddr;
+ fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
- if (!skb_is_gso(skb)) {
- skb->ip_summed = CHECKSUM_NONE;
- skb->encapsulation = 0;
- }
+ rt = ip_route_output_key(vxlan->net, &fl4);
+ if (!IS_ERR(rt)) {
+ *saddr = fl4.saddr;
+ if (use_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
}
-
- if (vxflags & VXLAN_F_GBP)
- vxlan_build_gbp_hdr(vxh, vxflags, md);
-
- skb_set_inner_protocol(skb, htons(ETH_P_TEB));
-
- udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df,
- src_port, dst_port, xnet,
- !(vxflags & VXLAN_F_UDP_CSUM));
- return 0;
+ return rt;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif,
const struct in6_addr *daddr,
- struct in6_addr *saddr)
+ struct in6_addr *saddr,
+ struct dst_cache *dst_cache)
{
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
+ if (!skb->mark) {
+ ndst = dst_cache_get_ip6(dst_cache, saddr);
+ if (ndst)
+ return ndst;
+ }
+
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.daddr = *daddr;
@@@ -1820,8 -1871,6 +1820,8 @@@
return ERR_PTR(err);
*saddr = fl6.saddr;
+ if (!skb->mark)
+ dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
#endif
@@@ -1874,24 -1923,22 +1874,24 @@@ static void vxlan_encap_bypass(struct s
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
+ struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk;
struct rtable *rt = NULL;
const struct iphdr *old_iph;
- struct flowi4 fl4;
union vxlan_addr *dst;
union vxlan_addr remote_ip;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
- u32 vni;
+ __be32 vni;
__be16 df = 0;
__u8 tos, ttl;
int err;
u32 flags = vxlan->flags;
+ bool udp_sum = false;
+ bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
info = skb_tunnel_info(skb);
@@@ -1899,7 -1946,6 +1899,7 @@@
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
dst = &rdst->remote_ip;
+ dst_cache = &rdst->dst_cache;
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@@ -1907,14 -1953,13 +1907,14 @@@
goto drop;
}
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- vni = be64_to_cpu(info->key.tun_id);
+ vni = vxlan_tun_id_to_vni(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET)
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
dst = &remote_ip;
+ dst_cache = &info->dst_cache;
}
if (vxlan_addr_any(dst)) {
@@@ -1942,7 -1987,6 +1942,7 @@@
if (info) {
ttl = info->key.ttl;
tos = info->key.tos;
+ udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
if (info->options_len)
md = ip_tunnel_info_opts(info);
@@@ -1951,16 -1995,29 +1951,16 @@@
}
if (dst->sa.sa_family == AF_INET) {
+ __be32 saddr;
+
if (!vxlan->vn4_sock)
goto drop;
sk = vxlan->vn4_sock->sock->sk;
- if (info) {
- if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
- df = htons(IP_DF);
-
- if (info->key.tun_flags & TUNNEL_CSUM)
- flags |= VXLAN_F_UDP_CSUM;
- else
- flags &= ~VXLAN_F_UDP_CSUM;
- }
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
- fl4.flowi4_tos = RT_TOS(tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = dst->sin.sin_addr.s_addr;
- fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
+ rt = vxlan_get_route(vxlan, skb,
+ rdst ? rdst->remote_ifindex : 0, tos,
+ dst->sin.sin_addr.s_addr, &saddr,
+ dst_cache, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr);
@@@ -1990,21 -2047,18 +1990,21 @@@
return;
}
+ if (!info)
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+ else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ df = htons(IP_DF);
+
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
- dst->sin.sin_addr.s_addr, tos, ttl, df,
- src_port, dst_port, htonl(vni << 8), md,
- !net_eq(vxlan->net, dev_net(vxlan->dev)),
- flags);
- if (err < 0) {
- /* skb is already freed. */
- skb = NULL;
- goto rt_tx_error;
- }
+ err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
+ vni, md, flags, udp_sum);
+ if (err < 0)
+ goto xmit_tx_error;
+
+ udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ dst->sin.sin_addr.s_addr, tos, ttl, df,
+ src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
@@@ -2017,8 -2071,7 +2017,8 @@@
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0,
- &dst->sin6.sin6_addr, &saddr);
+ &dst->sin6.sin6_addr, &saddr,
+ dst_cache);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
@@@ -2050,20 -2103,18 +2050,20 @@@
return;
}
- if (info) {
- if (info->key.tun_flags & TUNNEL_CSUM)
- flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
- else
- flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
- }
+ if (!info)
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
- err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
- 0, ttl, src_port, dst_port, htonl(vni << 8), md,
- !net_eq(vxlan->net, dev_net(vxlan->dev)),
- flags);
+ skb_scrub_packet(skb, xnet);
+ err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
+ vni, md, flags, udp_sum);
+ if (err < 0) {
+ dst_release(ndst);
+ return;
+ }
+ udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
+ &saddr, &dst->sin6.sin6_addr,
+ 0, ttl, src_port, dst_port, !udp_sum);
#endif
}
@@@ -2073,9 -2124,6 +2073,9 @@@ drop
dev->stats.tx_dropped++;
goto tx_free;
+xmit_tx_error:
+ /* skb is already freed. */
+ skb = NULL;
rt_tx_error:
ip_rt_put(rt);
tx_error:
@@@ -2123,9 -2171,11 +2123,11 @@@ static netdev_tx_t vxlan_xmit(struct sk
#endif
}
- if (vxlan->flags & VXLAN_F_COLLECT_METADATA &&
- info && info->mode & IP_TUNNEL_INFO_TX) {
- vxlan_xmit_one(skb, dev, NULL, false);
+ if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+ if (info && info->mode & IP_TUNNEL_INFO_TX)
+ vxlan_xmit_one(skb, dev, NULL, false);
+ else
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
@@@ -2213,7 -2263,7 +2215,7 @@@ static void vxlan_cleanup(unsigned lon
static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- __u32 vni = vxlan->default_dst.remote_vni;
+ __be32 vni = vxlan->default_dst.remote_vni;
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
@@@ -2319,29 -2369,68 +2321,43 @@@ static void vxlan_set_multicast_list(st
{
}
- static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
+ static int __vxlan_change_mtu(struct net_device *dev,
+ struct net_device *lowerdev,
+ struct vxlan_rdst *dst, int new_mtu, bool strict)
{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_rdst *dst = &vxlan->default_dst;
- struct net_device *lowerdev;
- int max_mtu;
+ int max_mtu = IP_MAX_MTU;
- lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
- if (lowerdev == NULL)
- return eth_change_mtu(dev, new_mtu);
+ if (lowerdev)
+ max_mtu = lowerdev->mtu;
if (dst->remote_ip.sa.sa_family == AF_INET6)
- max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
+ max_mtu -= VXLAN6_HEADROOM;
else
- max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
+ max_mtu -= VXLAN_HEADROOM;
- if (new_mtu < 68 || new_mtu > max_mtu)
+ if (new_mtu < 68)
return -EINVAL;
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
dev->mtu = new_mtu;
return 0;
}
+ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
+ {
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+ dst->remote_ifindex);
+ return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
+ }
+
-static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
- struct ip_tunnel_info *info,
- __be16 sport, __be16 dport)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct rtable *rt;
- struct flowi4 fl4;
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_tos = RT_TOS(info->key.tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = info->key.u.ipv4.dst;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- ip_rt_put(rt);
-
- info->key.u.ipv4.src = fl4.saddr;
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
- return 0;
-}
-
static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@@ -2353,16 -2442,9 +2369,16 @@@
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
if (ip_tunnel_info_af(info) == AF_INET) {
+ struct rtable *rt;
+
if (!vxlan->vn4_sock)
return -EINVAL;
- return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+ rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+ info->key.u.ipv4.dst,
+ &info->key.u.ipv4.src, NULL, info);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+ ip_rt_put(rt);
} else {
#if IS_ENABLED(CONFIG_IPV6)
struct dst_entry *ndst;
@@@ -2371,16 -2453,17 +2387,16 @@@
return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0,
&info->key.u.ipv6.dst,
- &info->key.u.ipv6.src);
+ &info->key.u.ipv6.src, NULL);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
-
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
#else /* !CONFIG_IPV6 */
return -EPFNOSUPPORT;
#endif
}
+ info->key.tp_src = sport;
+ info->key.tp_dst = dport;
return 0;
}
@@@ -2456,6 -2539,7 +2472,7 @@@ static void vxlan_setup(struct net_devi
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netif_keep_dst(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
INIT_LIST_HEAD(&vxlan->next);
@@@ -2698,6 -2782,7 +2715,7 @@@ static int vxlan_dev_configure(struct n
int err;
bool use_ipv6 = false;
__be16 default_port = vxlan->cfg.dst_port;
+ struct net_device *lowerdev = NULL;
vxlan->net = src_net;
@@@ -2718,9 -2803,7 +2736,7 @@@
}
if (conf->remote_ifindex) {
- struct net_device *lowerdev
- = __dev_get_by_index(src_net, conf->remote_ifindex);
-
+ lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
dst->remote_ifindex = conf->remote_ifindex;
if (!lowerdev) {
@@@ -2744,6 -2827,12 +2760,12 @@@
needed_headroom = lowerdev->hard_header_len;
}
+ if (conf->mtu) {
+ err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
+ if (err)
+ return err;
+ }
+
if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
needed_headroom += VXLAN6_HEADROOM;
else
@@@ -2828,7 -2917,7 +2850,7 @@@ static int vxlan_newlink(struct net *sr
memset(&conf, 0, sizeof(conf));
if (data[IFLA_VXLAN_ID])
- conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]);
+ conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
if (data[IFLA_VXLAN_GROUP]) {
conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
@@@ -2896,9 -2985,8 +2918,9 @@@
if (data[IFLA_VXLAN_PORT])
conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
- if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
- conf.flags |= VXLAN_F_UDP_CSUM;
+ if (data[IFLA_VXLAN_UDP_CSUM] &&
+ !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+ conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
@@@ -2933,7 -3021,7 +2955,7 @@@
break;
case -EEXIST:
- pr_info("duplicate VNI %u\n", conf.vni);
+ pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
break;
}
@@@ -2991,7 -3079,7 +3013,7 @@@ static int vxlan_fill_info(struct sk_bu
.high = htons(vxlan->cfg.port_max),
};
- if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni))
+ if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
goto nla_put_failure;
if (!vxlan_addr_any(&dst->remote_ip)) {
@@@ -3042,7 -3130,7 +3064,7 @@@
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- !!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+ !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
diff --combined drivers/net/wireless/intel/iwlwifi/Kconfig
index 11932d5,7438fbe..16c4f38
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@@ -53,7 -53,6 +53,6 @@@ config IWLWIFI_LED
config IWLDVM
tristate "Intel Wireless WiFi DVM Firmware support"
- depends on m
help
This is the driver that supports the DVM firmware. The list
of the devices that use this firmware is available here:
@@@ -99,18 -98,6 +98,18 @@@ config IWLWIFI_UAPS
If unsure, say N.
+config IWLWIFI_PCIE_RTPM
+ bool "Enable runtime power management mode for PCIe devices"
+ depends on IWLMVM && PM
+ default false
+ help
+ Say Y here to enable runtime power management for PCIe
+ devices. If enabled, the device will go into low power mode
+ when idle for a short period of time, allowing for improved
+ power saving during runtime.
+
+ If unsure, say N.
+
menu "Debugging Options"
config IWLWIFI_DEBUG
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 1e1ab9d,ea1e177..aa6d807
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@@ -930,11 -930,8 +930,11 @@@ int iwl_mvm_config_scan(struct iwl_mvm
if (WARN_ON(num_channels > mvm->fw->ucode_capa.n_scan_channels))
return -ENOBUFS;
- if (type == mvm->scan_type)
+ if (type == mvm->scan_type) {
+ IWL_DEBUG_SCAN(mvm,
+ "Ignoring UMAC scan config of the same type\n");
return 0;
+ }
cmd_size = sizeof(*scan_config) + mvm->fw->ucode_capa.n_scan_channels;
@@@ -1112,7 -1109,7 +1112,7 @@@ static int iwl_mvm_scan_umac(struct iwl
cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params,
vif));
- if (type == IWL_MVM_SCAN_SCHED)
+ if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT)
cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE);
if (iwl_mvm_scan_use_ebs(mvm, vif))
@@@ -1301,6 -1298,10 +1301,10 @@@ int iwl_mvm_sched_scan_start(struct iwl
return -EBUSY;
}
+ /* we don't support "match all" in the firmware */
+ if (!req->n_match_sets)
+ return -EOPNOTSUPP;
+
ret = iwl_mvm_check_running_scans(mvm, type);
if (ret)
return ret;
@@@ -1354,7 -1355,7 +1358,7 @@@
if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) {
hcmd.id = iwl_cmd_id(SCAN_REQ_UMAC, IWL_ALWAYS_LONG_GROUP, 0);
- ret = iwl_mvm_scan_umac(mvm, vif, ¶ms, IWL_MVM_SCAN_SCHED);
+ ret = iwl_mvm_scan_umac(mvm, vif, ¶ms, type);
} else {
hcmd.id = SCAN_OFFLOAD_REQUEST_CMD;
ret = iwl_mvm_scan_lmac(mvm, vif, ¶ms);
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 2f95916,73c9559..542bbc5
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@@ -2,7 -2,6 +2,7 @@@
*
* Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
*
* Portions of this file are derived from the ipw3945 project, as well
* as portions of the ieee80211 subsystem header files.
@@@ -57,23 -56,17 +57,23 @@@
#define RX_NUM_QUEUES 1
#define RX_POST_REQ_ALLOC 2
#define RX_CLAIM_REQ_ALLOC 8
-#define RX_POOL_SIZE ((RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC) * RX_NUM_QUEUES)
-#define RX_LOW_WATERMARK 8
+#define RX_PENDING_WATERMARK 16
struct iwl_host_cmd;
/*This file includes the declaration that are internal to the
* trans_pcie layer */
+/**
+ * struct iwl_rx_mem_buffer
+ * @page_dma: bus address of rxb page
+ * @page: driver's pointer to the rxb page
+ * @vid: index of this rxb in the global table
+ */
struct iwl_rx_mem_buffer {
dma_addr_t page_dma;
struct page *page;
+ u16 vid;
struct list_head list;
};
@@@ -97,12 -90,8 +97,12 @@@ struct isr_statistics
/**
* struct iwl_rxq - Rx queue
- * @bd: driver's pointer to buffer of receive buffer descriptors (rbd)
+ * @id: queue index
+ * @bd: driver's pointer to buffer of receive buffer descriptors (rbd).
+ * Address size is 32 bit in pre-9000 devices and 64 bit in 9000 devices.
* @bd_dma: bus address of buffer of receive buffer descriptors (rbd)
+ * @ubd: driver's pointer to buffer of used receive buffer descriptors (rbd)
+ * @ubd_dma: physical address of buffer of used receive buffer descriptors (rbd)
* @read: Shared index to newest available Rx buffer
* @write: Shared index to oldest written Rx packet
* @free_count: Number of pre-allocated buffers in rx_free
@@@ -114,34 -103,32 +114,34 @@@
* @rb_stts: driver's pointer to receive buffer status
* @rb_stts_dma: bus address of receive buffer status
* @lock:
- * @pool: initial pool of iwl_rx_mem_buffer for the queue
- * @queue: actual rx queue
+ * @queue: actual rx queue. Not used for multi-rx queue.
*
* NOTE: rx_free and rx_used are used as a FIFO for iwl_rx_mem_buffers
*/
struct iwl_rxq {
- __le32 *bd;
+ int id;
+ void *bd;
dma_addr_t bd_dma;
+ __le32 *used_bd;
+ dma_addr_t used_bd_dma;
u32 read;
u32 write;
u32 free_count;
u32 used_count;
u32 write_actual;
+ u32 queue_size;
struct list_head rx_free;
struct list_head rx_used;
bool need_update;
struct iwl_rb_status *rb_stts;
dma_addr_t rb_stts_dma;
spinlock_t lock;
- struct iwl_rx_mem_buffer pool[RX_QUEUE_SIZE];
+ struct napi_struct napi;
struct iwl_rx_mem_buffer *queue[RX_QUEUE_SIZE];
};
/**
* struct iwl_rb_allocator - Rx allocator
- * @pool: initial pool of allocator
* @req_pending: number of requests the allcator had not processed yet
* @req_ready: number of requests honored and ready for claiming
* @rbd_allocated: RBDs with pages allocated and ready to be handled to
@@@ -153,6 -140,7 +153,6 @@@
* @rx_alloc: work struct for background calls
*/
struct iwl_rb_allocator {
- struct iwl_rx_mem_buffer pool[RX_POOL_SIZE];
atomic_t req_pending;
atomic_t req_ready;
struct list_head rbd_allocated;
@@@ -292,7 -280,6 +292,7 @@@ struct iwl_txq
bool ampdu;
bool block;
unsigned long wd_timeout;
+ struct sk_buff_head overflow_q;
};
static inline dma_addr_t
@@@ -310,8 -297,6 +310,8 @@@ struct iwl_tso_hdr_page
/**
* struct iwl_trans_pcie - PCIe transport specific data
* @rxq: all the RX queue data
+ * @rx_pool: initial pool of iwl_rx_mem_buffer for all the queues
+ * @global_table: table mapping received VID from hw to rxb
* @rba: allocator for RX replenishing
* @drv - pointer to iwl_drv
* @trans: pointer to the generic transport area
@@@ -338,14 -323,13 +338,14 @@@
* @fw_mon_size: size of the buffer for the firmware monitor
*/
struct iwl_trans_pcie {
- struct iwl_rxq rxq;
+ struct iwl_rxq *rxq;
+ struct iwl_rx_mem_buffer rx_pool[MQ_RX_POOL_SIZE];
+ struct iwl_rx_mem_buffer *global_table[MQ_RX_TABLE_SIZE];
struct iwl_rb_allocator rba;
struct iwl_trans *trans;
struct iwl_drv *drv;
struct net_device napi_dev;
- struct napi_struct napi;
struct __percpu iwl_tso_hdr_page *tso_hdr_page;
@@@ -375,7 -359,6 +375,7 @@@
bool ucode_write_complete;
wait_queue_head_t ucode_write_waitq;
wait_queue_head_t wait_command_queue;
+ wait_queue_head_t d0i3_waitq;
u8 cmd_queue;
u8 cmd_fifo;
@@@ -507,6 -490,15 +507,15 @@@ static inline void iwl_enable_interrupt
iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
}
+ static inline void iwl_enable_fw_load_int(struct iwl_trans *trans)
+ {
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+ IWL_DEBUG_ISR(trans, "Enabling FW load interrupt\n");
+ trans_pcie->inta_mask = CSR_INT_BIT_FH_TX;
+ iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
+ }
+
static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -596,7 -588,4 +605,7 @@@ static inline int iwl_trans_pcie_dbgfs_
}
#endif
+int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans);
+int iwl_pci_fw_enter_d0i3(struct iwl_trans *trans);
+
#endif /* __iwl_trans_int_pcie_h__ */
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 51314e56,152cf9a..07973ef
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@@ -2,7 -2,6 +2,7 @@@
*
* Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
*
* Portions of this file are derived from the ipw3945 project, as well
* as portions of the ieee80211 subsystem header files.
@@@ -141,8 -140,8 +141,8 @@@
*/
static int iwl_rxq_space(const struct iwl_rxq *rxq)
{
- /* Make sure RX_QUEUE_SIZE is a power of 2 */
- BUILD_BUG_ON(RX_QUEUE_SIZE & (RX_QUEUE_SIZE - 1));
+ /* Make sure rx queue size is a power of 2 */
+ WARN_ON(rxq->queue_size & (rxq->queue_size - 1));
/*
* There can be up to (RX_QUEUE_SIZE - 1) free slots, to avoid ambiguity
@@@ -150,7 -149,7 +150,7 @@@
* The following is equivalent to modulo by RX_QUEUE_SIZE and is well
* defined for negative dividends.
*/
- return (rxq->read - rxq->write - 1) & (RX_QUEUE_SIZE - 1);
+ return (rxq->read - rxq->write - 1) & (rxq->queue_size - 1);
}
/*
@@@ -161,12 -160,6 +161,12 @@@ static inline __le32 iwl_pcie_dma_addr2
return cpu_to_le32((u32)(dma_addr >> 8));
}
+static void iwl_pcie_write_prph_64(struct iwl_trans *trans, u64 ofs, u64 val)
+{
+ iwl_write_prph(trans, ofs, val & 0xffffffff);
+ iwl_write_prph(trans, ofs + 4, val >> 32);
+}
+
/*
* iwl_pcie_rx_stop - stops the Rx DMA
*/
@@@ -180,9 -173,10 +180,9 @@@ int iwl_pcie_rx_stop(struct iwl_trans *
/*
* iwl_pcie_rxq_inc_wr_ptr - Update the write pointer for the RX queue
*/
-static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans)
+static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans,
+ struct iwl_rxq *rxq)
{
- struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
u32 reg;
lockdep_assert_held(&rxq->lock);
@@@ -207,73 -201,24 +207,73 @@@
}
rxq->write_actual = round_down(rxq->write, 8);
- iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
+ if (trans->cfg->mq_rx_supported)
+ iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(rxq->id),
+ rxq->write_actual);
+ else
+ iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
}
static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
+ int i;
- spin_lock(&rxq->lock);
+ for (i = 0; i < trans->num_rx_queues; i++) {
+ struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+ if (!rxq->need_update)
+ continue;
+ spin_lock(&rxq->lock);
+ iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
+ rxq->need_update = false;
+ spin_unlock(&rxq->lock);
+ }
+}
+
+static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans,
+ struct iwl_rxq *rxq)
+{
+ struct iwl_rx_mem_buffer *rxb;
+
+ /*
+ * If the device isn't enabled - no need to try to add buffers...
+ * This can happen when we stop the device and still have an interrupt
+ * pending. We stop the APM before we sync the interrupts because we
+ * have to (see comment there). On the other hand, since the APM is
+ * stopped, we cannot access the HW (in particular not prph).
+ * So don't try to restock if the APM has been already stopped.
+ */
+ if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status))
+ return;
- if (!rxq->need_update)
- goto exit_unlock;
+ spin_lock(&rxq->lock);
+ while (rxq->free_count) {
+ __le64 *bd = (__le64 *)rxq->bd;
- iwl_pcie_rxq_inc_wr_ptr(trans);
- rxq->need_update = false;
+ /* Get next free Rx buffer, remove from free list */
+ rxb = list_first_entry(&rxq->rx_free, struct iwl_rx_mem_buffer,
+ list);
+ list_del(&rxb->list);
- exit_unlock:
+ /* 12 first bits are expected to be empty */
+ WARN_ON(rxb->page_dma & DMA_BIT_MASK(12));
+ /* Point to Rx buffer via next RBD in circular buffer */
+ bd[rxq->write] = cpu_to_le64(rxb->page_dma | rxb->vid);
+ rxq->write = (rxq->write + 1) & MQ_RX_TABLE_MASK;
+ rxq->free_count--;
+ }
spin_unlock(&rxq->lock);
+
+ /*
+ * If we've added more space for the firmware to place data, tell it.
+ * Increment device's write pointer in multiples of 8.
+ */
+ if (rxq->write_actual != (rxq->write & ~0x7)) {
+ spin_lock(&rxq->lock);
+ iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
+ spin_unlock(&rxq->lock);
+ }
}
/*
@@@ -287,8 -232,10 +287,8 @@@
* also updates the memory address in the firmware to reference the new
* target buffer.
*/
-static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
+static void iwl_pcie_rxq_restock(struct iwl_trans *trans, struct iwl_rxq *rxq)
{
- struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
struct iwl_rx_mem_buffer *rxb;
/*
@@@ -304,7 -251,6 +304,7 @@@
spin_lock(&rxq->lock);
while ((iwl_rxq_space(rxq) > 0) && (rxq->free_count)) {
+ __le32 *bd = (__le32 *)rxq->bd;
/* The overwritten rxb must be a used one */
rxb = rxq->queue[rxq->write];
BUG_ON(rxb && rxb->page);
@@@ -315,7 -261,7 +315,7 @@@
list_del(&rxb->list);
/* Point to Rx buffer via next RBD in circular buffer */
- rxq->bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
+ bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
rxq->queue[rxq->write] = rxb;
rxq->write = (rxq->write + 1) & RX_QUEUE_MASK;
rxq->free_count--;
@@@ -326,7 -272,7 +326,7 @@@
* Increment device's write pointer in multiples of 8. */
if (rxq->write_actual != (rxq->write & ~0x7)) {
spin_lock(&rxq->lock);
- iwl_pcie_rxq_inc_wr_ptr(trans);
+ iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
spin_unlock(&rxq->lock);
}
}
@@@ -339,9 -285,13 +339,9 @@@ static struct page *iwl_pcie_rx_alloc_p
gfp_t priority)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
struct page *page;
gfp_t gfp_mask = priority;
- if (rxq->free_count > RX_LOW_WATERMARK)
- gfp_mask |= __GFP_NOWARN;
-
if (trans_pcie->rx_page_order > 0)
gfp_mask |= __GFP_COMP;
@@@ -351,13 -301,16 +351,13 @@@
if (net_ratelimit())
IWL_DEBUG_INFO(trans, "alloc_pages failed, order: %d\n",
trans_pcie->rx_page_order);
- /* Issue an error if the hardware has consumed more than half
- * of its free buffer list and we don't have enough
- * pre-allocated buffers.
+ /*
+ * Issue an error if we don't have enough pre-allocated
+ * buffers.
` */
- if (rxq->free_count <= RX_LOW_WATERMARK &&
- iwl_rxq_space(rxq) > (RX_QUEUE_SIZE / 2) &&
- net_ratelimit())
+ if (!(gfp_mask & __GFP_NOWARN) && net_ratelimit())
IWL_CRIT(trans,
- "Failed to alloc_pages with GFP_KERNEL. Only %u free buffers remaining.\n",
- rxq->free_count);
+ "Failed to alloc_pages\n");
return NULL;
}
return page;
@@@ -372,10 -325,10 +372,10 @@@
* iwl_pcie_rxq_restock. The latter function will update the HW to use the newly
* allocated buffers.
*/
-static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
+static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority,
+ struct iwl_rxq *rxq)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
struct iwl_rx_mem_buffer *rxb;
struct page *page;
@@@ -419,6 -372,10 +419,6 @@@
__free_pages(page, trans_pcie->rx_page_order);
return;
}
- /* dma address must be no more than 36 bits */
- BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
- /* and also 256 byte aligned! */
- BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
spin_lock(&rxq->lock);
@@@ -429,24 -386,41 +429,24 @@@
}
}
-static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans)
+static void iwl_pcie_free_rbs_pool(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
int i;
- lockdep_assert_held(&rxq->lock);
-
- for (i = 0; i < RX_QUEUE_SIZE; i++) {
- if (!rxq->pool[i].page)
+ for (i = 0; i < MQ_RX_POOL_SIZE; i++) {
+ if (!trans_pcie->rx_pool[i].page)
continue;
- dma_unmap_page(trans->dev, rxq->pool[i].page_dma,
+ dma_unmap_page(trans->dev, trans_pcie->rx_pool[i].page_dma,
PAGE_SIZE << trans_pcie->rx_page_order,
DMA_FROM_DEVICE);
- __free_pages(rxq->pool[i].page, trans_pcie->rx_page_order);
- rxq->pool[i].page = NULL;
+ __free_pages(trans_pcie->rx_pool[i].page,
+ trans_pcie->rx_page_order);
+ trans_pcie->rx_pool[i].page = NULL;
}
}
/*
- * iwl_pcie_rx_replenish - Move all used buffers from rx_used to rx_free
- *
- * When moving to rx_free an page is allocated for the slot.
- *
- * Also restock the Rx queue via iwl_pcie_rxq_restock.
- * This is called only during initialization
- */
-static void iwl_pcie_rx_replenish(struct iwl_trans *trans)
-{
- iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL);
-
- iwl_pcie_rxq_restock(trans);
-}
-
-/*
* iwl_pcie_rx_allocator - Allocates pages in the background for RX queues
*
* Allocates for each received request 8 pages
@@@ -470,11 -444,6 +470,11 @@@ static void iwl_pcie_rx_allocator(struc
while (pending) {
int i;
struct list_head local_allocated;
+ gfp_t gfp_mask = GFP_KERNEL;
+
+ /* Do not post a warning if there are only a few requests */
+ if (pending < RX_PENDING_WATERMARK)
+ gfp_mask |= __GFP_NOWARN;
INIT_LIST_HEAD(&local_allocated);
@@@ -494,7 -463,7 +494,7 @@@
BUG_ON(rxb->page);
/* Alloc a new receive buffer */
- page = iwl_pcie_rx_alloc_page(trans, GFP_KERNEL);
+ page = iwl_pcie_rx_alloc_page(trans, gfp_mask);
if (!page)
continue;
rxb->page = page;
@@@ -508,6 -477,10 +508,6 @@@
__free_pages(page, trans_pcie->rx_page_order);
continue;
}
- /* dma address must be no more than 36 bits */
- BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
- /* and also 256 byte aligned! */
- BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
/* move the allocated entry to the out list */
list_move(&rxb->list, &local_allocated);
@@@ -588,83 -561,38 +588,83 @@@ static void iwl_pcie_rx_allocator_work(
static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
struct iwl_rb_allocator *rba = &trans_pcie->rba;
struct device *dev = trans->dev;
+ int i;
+ int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) :
+ sizeof(__le32);
+
+ if (WARN_ON(trans_pcie->rxq))
+ return -EINVAL;
- memset(&trans_pcie->rxq, 0, sizeof(trans_pcie->rxq));
+ trans_pcie->rxq = kcalloc(trans->num_rx_queues, sizeof(struct iwl_rxq),
+ GFP_KERNEL);
+ if (!trans_pcie->rxq)
+ return -EINVAL;
- spin_lock_init(&rxq->lock);
spin_lock_init(&rba->lock);
- if (WARN_ON(rxq->bd || rxq->rb_stts))
- return -EINVAL;
+ for (i = 0; i < trans->num_rx_queues; i++) {
+ struct iwl_rxq *rxq = &trans_pcie->rxq[i];
- /* Allocate the circular buffer of Read Buffer Descriptors (RBDs) */
- rxq->bd = dma_zalloc_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE,
- &rxq->bd_dma, GFP_KERNEL);
- if (!rxq->bd)
- goto err_bd;
+ spin_lock_init(&rxq->lock);
+ if (trans->cfg->mq_rx_supported)
+ rxq->queue_size = MQ_RX_TABLE_SIZE;
+ else
+ rxq->queue_size = RX_QUEUE_SIZE;
- /*Allocate the driver's pointer to receive buffer status */
- rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts),
- &rxq->rb_stts_dma, GFP_KERNEL);
- if (!rxq->rb_stts)
- goto err_rb_stts;
+ /*
+ * Allocate the circular buffer of Read Buffer Descriptors
+ * (RBDs)
+ */
+ rxq->bd = dma_zalloc_coherent(dev,
+ free_size * rxq->queue_size,
+ &rxq->bd_dma, GFP_KERNEL);
+ if (!rxq->bd)
+ goto err;
+
+ if (trans->cfg->mq_rx_supported) {
+ rxq->used_bd = dma_zalloc_coherent(dev,
+ sizeof(__le32) *
+ rxq->queue_size,
+ &rxq->used_bd_dma,
+ GFP_KERNEL);
+ if (!rxq->used_bd)
+ goto err;
+ }
+ /*Allocate the driver's pointer to receive buffer status */
+ rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts),
+ &rxq->rb_stts_dma,
+ GFP_KERNEL);
+ if (!rxq->rb_stts)
+ goto err;
+ }
return 0;
-err_rb_stts:
- dma_free_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE,
- rxq->bd, rxq->bd_dma);
- rxq->bd_dma = 0;
- rxq->bd = NULL;
-err_bd:
+err:
+ for (i = 0; i < trans->num_rx_queues; i++) {
+ struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+ if (rxq->bd)
+ dma_free_coherent(dev, free_size * rxq->queue_size,
+ rxq->bd, rxq->bd_dma);
+ rxq->bd_dma = 0;
+ rxq->bd = NULL;
+
+ if (rxq->rb_stts)
+ dma_free_coherent(trans->dev,
+ sizeof(struct iwl_rb_status),
+ rxq->rb_stts, rxq->rb_stts_dma);
+
+ if (rxq->used_bd)
+ dma_free_coherent(dev, sizeof(__le32) * rxq->queue_size,
+ rxq->used_bd, rxq->used_bd_dma);
+ rxq->used_bd_dma = 0;
+ rxq->used_bd = NULL;
+ }
+ kfree(trans_pcie->rxq);
+
return -ENOMEM;
}
@@@ -731,103 -659,65 +731,103 @@@ static void iwl_pcie_rx_hw_init(struct
iwl_set_bit(trans, CSR_INT_COALESCING, IWL_HOST_INT_OPER_MODE);
}
-static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
+static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ u32 rb_size, enabled = 0;
int i;
- lockdep_assert_held(&rxq->lock);
-
- INIT_LIST_HEAD(&rxq->rx_free);
- INIT_LIST_HEAD(&rxq->rx_used);
- rxq->free_count = 0;
- rxq->used_count = 0;
+ switch (trans_pcie->rx_buf_size) {
+ case IWL_AMSDU_4K:
+ rb_size = RFH_RXF_DMA_RB_SIZE_4K;
+ break;
+ case IWL_AMSDU_8K:
+ rb_size = RFH_RXF_DMA_RB_SIZE_8K;
+ break;
+ case IWL_AMSDU_12K:
+ rb_size = RFH_RXF_DMA_RB_SIZE_12K;
+ break;
+ default:
+ WARN_ON(1);
+ rb_size = RFH_RXF_DMA_RB_SIZE_4K;
+ }
- for (i = 0; i < RX_QUEUE_SIZE; i++)
- list_add(&rxq->pool[i].list, &rxq->rx_used);
-}
+ /* Stop Rx DMA */
+ iwl_write_prph(trans, RFH_RXF_DMA_CFG, 0);
+ /* disable free amd used rx queue operation */
+ iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, 0);
+
+ for (i = 0; i < trans->num_rx_queues; i++) {
+ /* Tell device where to find RBD free table in DRAM */
+ iwl_pcie_write_prph_64(trans, RFH_Q_FRBDCB_BA_LSB(i),
+ (u64)(trans_pcie->rxq[i].bd_dma));
+ /* Tell device where to find RBD used table in DRAM */
+ iwl_pcie_write_prph_64(trans, RFH_Q_URBDCB_BA_LSB(i),
+ (u64)(trans_pcie->rxq[i].used_bd_dma));
+ /* Tell device where in DRAM to update its Rx status */
+ iwl_pcie_write_prph_64(trans, RFH_Q_URBD_STTS_WPTR_LSB(i),
+ trans_pcie->rxq[i].rb_stts_dma);
+ /* Reset device indice tables */
+ iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(i), 0);
+ iwl_write_prph(trans, RFH_Q_FRBDCB_RIDX(i), 0);
+ iwl_write_prph(trans, RFH_Q_URBDCB_WIDX(i), 0);
+
+ enabled |= BIT(i) | BIT(i + 16);
+ }
-static void iwl_pcie_rx_init_rba(struct iwl_rb_allocator *rba)
-{
- int i;
+ /* restock default queue */
+ iwl_pcie_rxq_mq_restock(trans, &trans_pcie->rxq[0]);
- lockdep_assert_held(&rba->lock);
+ /*
+ * Enable Rx DMA
+ * Single frame mode
+ * Rx buffer size 4 or 8k or 12k
+ * Min RB size 4 or 8
+ * 512 RBDs
+ */
+ iwl_write_prph(trans, RFH_RXF_DMA_CFG,
+ RFH_DMA_EN_ENABLE_VAL |
+ rb_size | RFH_RXF_DMA_SINGLE_FRAME_MASK |
+ RFH_RXF_DMA_MIN_RB_4_8 |
+ RFH_RXF_DMA_RBDCB_SIZE_512);
- INIT_LIST_HEAD(&rba->rbd_allocated);
- INIT_LIST_HEAD(&rba->rbd_empty);
+ iwl_write_prph(trans, RFH_GEN_CFG, RFH_GEN_CFG_RFH_DMA_SNOOP |
+ RFH_GEN_CFG_SERVICE_DMA_SNOOP);
+ iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, enabled);
- for (i = 0; i < RX_POOL_SIZE; i++)
- list_add(&rba->pool[i].list, &rba->rbd_empty);
+ /* Set interrupt coalescing timer to default (2048 usecs) */
+ iwl_write8(trans, CSR_INT_COALESCING, IWL_HOST_INT_TIMEOUT_DEF);
}
-static void iwl_pcie_rx_free_rba(struct iwl_trans *trans)
+static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
{
- struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rb_allocator *rba = &trans_pcie->rba;
- int i;
+ lockdep_assert_held(&rxq->lock);
- lockdep_assert_held(&rba->lock);
+ INIT_LIST_HEAD(&rxq->rx_free);
+ INIT_LIST_HEAD(&rxq->rx_used);
+ rxq->free_count = 0;
+ rxq->used_count = 0;
+}
- for (i = 0; i < RX_POOL_SIZE; i++) {
- if (!rba->pool[i].page)
- continue;
- dma_unmap_page(trans->dev, rba->pool[i].page_dma,
- PAGE_SIZE << trans_pcie->rx_page_order,
- DMA_FROM_DEVICE);
- __free_pages(rba->pool[i].page, trans_pcie->rx_page_order);
- rba->pool[i].page = NULL;
- }
+static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget)
+{
+ WARN_ON(1);
+ return 0;
}
int iwl_pcie_rx_init(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
+ struct iwl_rxq *def_rxq;
struct iwl_rb_allocator *rba = &trans_pcie->rba;
- int i, err;
+ int i, err, num_rbds, allocator_pool_size;
- if (!rxq->bd) {
+ if (!trans_pcie->rxq) {
err = iwl_pcie_rx_alloc(trans);
if (err)
return err;
}
+ def_rxq = trans_pcie->rxq;
if (!rba->alloc_wq)
rba->alloc_wq = alloc_workqueue("rb_allocator",
WQ_HIGHPRI | WQ_UNBOUND, 1);
@@@ -836,68 -726,34 +836,68 @@@
spin_lock(&rba->lock);
atomic_set(&rba->req_pending, 0);
atomic_set(&rba->req_ready, 0);
- /* free all first - we might be reconfigured for a different size */
- iwl_pcie_rx_free_rba(trans);
- iwl_pcie_rx_init_rba(rba);
+ INIT_LIST_HEAD(&rba->rbd_allocated);
+ INIT_LIST_HEAD(&rba->rbd_empty);
spin_unlock(&rba->lock);
- spin_lock(&rxq->lock);
-
/* free all first - we might be reconfigured for a different size */
- iwl_pcie_rxq_free_rbs(trans);
- iwl_pcie_rx_init_rxb_lists(rxq);
+ iwl_pcie_free_rbs_pool(trans);
for (i = 0; i < RX_QUEUE_SIZE; i++)
- rxq->queue[i] = NULL;
+ def_rxq->queue[i] = NULL;
- /* Set us so that we have processed and used all buffers, but have
- * not restocked the Rx queue with fresh buffers */
- rxq->read = rxq->write = 0;
- rxq->write_actual = 0;
- memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
- spin_unlock(&rxq->lock);
+ for (i = 0; i < trans->num_rx_queues; i++) {
+ struct iwl_rxq *rxq = &trans_pcie->rxq[i];
- iwl_pcie_rx_replenish(trans);
+ rxq->id = i;
- iwl_pcie_rx_hw_init(trans, rxq);
+ spin_lock(&rxq->lock);
+ /*
+ * Set read write pointer to reflect that we have processed
+ * and used all buffers, but have not restocked the Rx queue
+ * with fresh buffers
+ */
+ rxq->read = 0;
+ rxq->write = 0;
+ rxq->write_actual = 0;
+ memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
- spin_lock(&rxq->lock);
- iwl_pcie_rxq_inc_wr_ptr(trans);
- spin_unlock(&rxq->lock);
+ iwl_pcie_rx_init_rxb_lists(rxq);
+
+ if (!rxq->napi.poll)
+ netif_napi_add(&trans_pcie->napi_dev, &rxq->napi,
+ iwl_pcie_dummy_napi_poll, 64);
+
+ spin_unlock(&rxq->lock);
+ }
+
+ /* move the pool to the default queue and allocator ownerships */
+ num_rbds = trans->cfg->mq_rx_supported ?
+ MQ_RX_POOL_SIZE : RX_QUEUE_SIZE;
+ allocator_pool_size = trans->num_rx_queues *
+ (RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC);
+ for (i = 0; i < num_rbds; i++) {
+ struct iwl_rx_mem_buffer *rxb = &trans_pcie->rx_pool[i];
+
+ if (i < allocator_pool_size)
+ list_add(&rxb->list, &rba->rbd_empty);
+ else
+ list_add(&rxb->list, &def_rxq->rx_used);
+ trans_pcie->global_table[i] = rxb;
+ rxb->vid = (u16)i;
+ }
+
+ iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL, def_rxq);
+ if (trans->cfg->mq_rx_supported) {
+ iwl_pcie_rx_mq_hw_init(trans);
+ } else {
+ iwl_pcie_rxq_restock(trans, def_rxq);
+ iwl_pcie_rx_hw_init(trans, def_rxq);
+ }
+
+ spin_lock(&def_rxq->lock);
+ iwl_pcie_rxq_inc_wr_ptr(trans, def_rxq);
+ spin_unlock(&def_rxq->lock);
return 0;
}
@@@ -905,16 -761,12 +905,16 @@@
void iwl_pcie_rx_free(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
struct iwl_rb_allocator *rba = &trans_pcie->rba;
+ int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) :
+ sizeof(__le32);
+ int i;
- /*if rxq->bd is NULL, it means that nothing has been allocated,
- * exit now */
- if (!rxq->bd) {
+ /*
+ * if rxq is NULL, it means that nothing has been allocated,
+ * exit now
+ */
+ if (!trans_pcie->rxq) {
IWL_DEBUG_INFO(trans, "Free NULL rx context\n");
return;
}
@@@ -925,37 -777,27 +925,37 @@@
rba->alloc_wq = NULL;
}
- spin_lock(&rba->lock);
- iwl_pcie_rx_free_rba(trans);
- spin_unlock(&rba->lock);
-
- spin_lock(&rxq->lock);
- iwl_pcie_rxq_free_rbs(trans);
- spin_unlock(&rxq->lock);
-
- dma_free_coherent(trans->dev, sizeof(__le32) * RX_QUEUE_SIZE,
- rxq->bd, rxq->bd_dma);
- rxq->bd_dma = 0;
- rxq->bd = NULL;
-
- if (rxq->rb_stts)
- dma_free_coherent(trans->dev,
- sizeof(struct iwl_rb_status),
- rxq->rb_stts, rxq->rb_stts_dma);
- else
- IWL_DEBUG_INFO(trans, "Free rxq->rb_stts which is NULL\n");
- rxq->rb_stts_dma = 0;
- rxq->rb_stts = NULL;
+ iwl_pcie_free_rbs_pool(trans);
+
+ for (i = 0; i < trans->num_rx_queues; i++) {
+ struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+ if (rxq->bd)
+ dma_free_coherent(trans->dev,
+ free_size * rxq->queue_size,
+ rxq->bd, rxq->bd_dma);
+ rxq->bd_dma = 0;
+ rxq->bd = NULL;
+
+ if (rxq->rb_stts)
+ dma_free_coherent(trans->dev,
+ sizeof(struct iwl_rb_status),
+ rxq->rb_stts, rxq->rb_stts_dma);
+ else
+ IWL_DEBUG_INFO(trans,
+ "Free rxq->rb_stts which is NULL\n");
+
+ if (rxq->used_bd)
+ dma_free_coherent(trans->dev,
+ sizeof(__le32) * rxq->queue_size,
+ rxq->used_bd, rxq->used_bd_dma);
+ rxq->used_bd_dma = 0;
+ rxq->used_bd = NULL;
+
+ if (rxq->napi.poll)
+ netif_napi_del(&rxq->napi);
+ }
+ kfree(trans_pcie->rxq);
}
/*
@@@ -999,11 -841,11 +999,11 @@@ static void iwl_pcie_rx_reuse_rbd(struc
}
static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
+ struct iwl_rxq *rxq,
struct iwl_rx_mem_buffer *rxb,
bool emergency)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
bool page_stolen = false;
int max_len = PAGE_SIZE << trans_pcie->rx_page_order;
@@@ -1069,12 -911,7 +1069,12 @@@
index = SEQ_TO_INDEX(sequence);
cmd_index = get_cmd_index(&txq->q, index);
- iwl_op_mode_rx(trans->op_mode, &trans_pcie->napi, &rxcb);
+ if (rxq->id == 0)
+ iwl_op_mode_rx(trans->op_mode, &rxq->napi,
+ &rxcb);
+ else
+ iwl_op_mode_rx_rss(trans->op_mode, &rxq->napi,
+ &rxcb, rxq->id);
if (reclaim) {
kzfree(txq->entries[cmd_index].free_buf);
@@@ -1138,7 -975,7 +1138,7 @@@
static void iwl_pcie_rx_handle(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
+ struct iwl_rxq *rxq = &trans_pcie->rxq[0];
u32 r, i, j, count = 0;
bool emergency = false;
@@@ -1156,26 -993,16 +1156,26 @@@ restart
while (i != r) {
struct iwl_rx_mem_buffer *rxb;
- if (unlikely(rxq->used_count == RX_QUEUE_SIZE / 2))
+ if (unlikely(rxq->used_count == rxq->queue_size / 2))
emergency = true;
- rxb = rxq->queue[i];
- rxq->queue[i] = NULL;
+ if (trans->cfg->mq_rx_supported) {
+ /*
+ * used_bd is a 32 bit but only 12 are used to retrieve
+ * the vid
+ */
+ u16 vid = (u16)le32_to_cpu(rxq->used_bd[i]);
+
+ rxb = trans_pcie->global_table[vid];
+ } else {
+ rxb = rxq->queue[i];
+ rxq->queue[i] = NULL;
+ }
IWL_DEBUG_RX(trans, "rxbuf: HW = %d, SW = %d\n", r, i);
- iwl_pcie_rx_handle_rb(trans, rxb, emergency);
+ iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency);
- i = (i + 1) & RX_QUEUE_MASK;
+ i = (i + 1) & (rxq->queue_size - 1);
/* If we have RX_CLAIM_REQ_ALLOC released rx buffers -
* try to claim the pre-allocated buffers from the allocator */
@@@ -1213,10 -1040,10 +1213,10 @@@
count++;
if (count == 8) {
count = 0;
- if (rxq->used_count < RX_QUEUE_SIZE / 3)
+ if (rxq->used_count < rxq->queue_size / 3)
emergency = false;
spin_unlock(&rxq->lock);
- iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+ iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq);
spin_lock(&rxq->lock);
}
}
@@@ -1228,10 -1055,7 +1228,10 @@@
if (rxq->free_count >= RX_CLAIM_REQ_ALLOC) {
rxq->read = i;
spin_unlock(&rxq->lock);
- iwl_pcie_rxq_restock(trans);
+ if (trans->cfg->mq_rx_supported)
+ iwl_pcie_rxq_mq_restock(trans, rxq);
+ else
+ iwl_pcie_rxq_restock(trans, rxq);
goto restart;
}
}
@@@ -1253,10 -1077,10 +1253,10 @@@
* will be restocked by the next call of iwl_pcie_rxq_restock.
*/
if (unlikely(emergency && count))
- iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+ iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq);
- if (trans_pcie->napi.poll)
- napi_gro_flush(&trans_pcie->napi, false);
+ if (rxq->napi.poll)
+ napi_gro_flush(&rxq->napi, false);
}
/*
@@@ -1614,9 -1438,11 +1614,11 @@@ irqreturn_t iwl_pcie_irq_handler(int ir
inta & ~trans_pcie->inta_mask);
}
- /* Re-enable all interrupts */
- /* only Re-enable if disabled by irq */
- if (test_bit(STATUS_INT_ENABLED, &trans->status))
+ /* we are loading the firmware, enable FH_TX interrupt only */
+ if (handled & CSR_INT_BIT_FH_TX)
+ iwl_enable_fw_load_int(trans);
+ /* only Re-enable all interrupt if disabled by irq */
+ else if (test_bit(STATUS_INT_ENABLED, &trans->status))
iwl_enable_interrupts(trans);
/* Re-enable RF_KILL if it occurred */
else if (handled & CSR_INT_BIT_RF_KILL)
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index b796952,5a854c6..58591ca
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@@ -72,7 -72,6 +72,7 @@@
#include <linux/bitops.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
+#include <linux/pm_runtime.h>
#include "iwl-drv.h"
#include "iwl-trans.h"
@@@ -1022,82 -1021,6 +1022,6 @@@ static int iwl_pcie_load_given_ucode_80
&first_ucode_section);
}
- static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
- const struct fw_img *fw, bool run_in_rfkill)
- {
- struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- bool hw_rfkill;
- int ret;
-
- mutex_lock(&trans_pcie->mutex);
-
- /* Someone called stop_device, don't try to start_fw */
- if (trans_pcie->is_down) {
- IWL_WARN(trans,
- "Can't start_fw since the HW hasn't been started\n");
- ret = EIO;
- goto out;
- }
-
- /* This may fail if AMT took ownership of the device */
- if (iwl_pcie_prepare_card_hw(trans)) {
- IWL_WARN(trans, "Exit HW not ready\n");
- ret = -EIO;
- goto out;
- }
-
- iwl_enable_rfkill_int(trans);
-
- /* If platform's RF_KILL switch is NOT set to KILL */
- hw_rfkill = iwl_is_rfkill_set(trans);
- if (hw_rfkill)
- set_bit(STATUS_RFKILL, &trans->status);
- else
- clear_bit(STATUS_RFKILL, &trans->status);
- iwl_trans_pcie_rf_kill(trans, hw_rfkill);
- if (hw_rfkill && !run_in_rfkill) {
- ret = -ERFKILL;
- goto out;
- }
-
- iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-
- ret = iwl_pcie_nic_init(trans);
- if (ret) {
- IWL_ERR(trans, "Unable to init nic\n");
- goto out;
- }
-
- /* make sure rfkill handshake bits are cleared */
- iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
- iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
- CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
-
- /* clear (again), then enable host interrupts */
- iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
- iwl_enable_interrupts(trans);
-
- /* really make sure rfkill handshake bits are cleared */
- iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
- iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-
- /* Load the given image to the HW */
- if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
- ret = iwl_pcie_load_given_ucode_8000(trans, fw);
- else
- ret = iwl_pcie_load_given_ucode(trans, fw);
-
- out:
- mutex_unlock(&trans_pcie->mutex);
- return ret;
- }
-
- static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
- {
- iwl_pcie_reset_ict(trans);
- iwl_pcie_tx_start(trans, scd_addr);
- }
-
static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -1128,7 -1051,8 +1052,8 @@@
* already dead.
*/
if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
- IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n");
+ IWL_DEBUG_INFO(trans,
+ "DEVICE_ENABLED bit was set and is now cleared\n");
iwl_pcie_tx_stop(trans);
iwl_pcie_rx_stop(trans);
@@@ -1162,7 -1086,6 +1087,6 @@@
iwl_disable_interrupts(trans);
spin_unlock(&trans_pcie->irq_lock);
-
/* clear all status bits */
clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
clear_bit(STATUS_INT_ENABLED, &trans->status);
@@@ -1195,10 -1118,116 +1119,116 @@@
if (hw_rfkill != was_hw_rfkill)
iwl_trans_pcie_rf_kill(trans, hw_rfkill);
- /* re-take ownership to prevent other users from stealing the deivce */
+ /* re-take ownership to prevent other users from stealing the device */
iwl_pcie_prepare_card_hw(trans);
}
+ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
+ const struct fw_img *fw, bool run_in_rfkill)
+ {
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ bool hw_rfkill;
+ int ret;
+
+ /* This may fail if AMT took ownership of the device */
+ if (iwl_pcie_prepare_card_hw(trans)) {
+ IWL_WARN(trans, "Exit HW not ready\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ iwl_enable_rfkill_int(trans);
+
+ iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+ /*
+ * We enabled the RF-Kill interrupt and the handler may very
+ * well be running. Disable the interrupts to make sure no other
+ * interrupt can be fired.
+ */
+ iwl_disable_interrupts(trans);
+
+ /* Make sure it finished running */
+ synchronize_irq(trans_pcie->pci_dev->irq);
+
+ mutex_lock(&trans_pcie->mutex);
+
+ /* If platform's RF_KILL switch is NOT set to KILL */
+ hw_rfkill = iwl_is_rfkill_set(trans);
+ if (hw_rfkill)
+ set_bit(STATUS_RFKILL, &trans->status);
+ else
+ clear_bit(STATUS_RFKILL, &trans->status);
+ iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+ if (hw_rfkill && !run_in_rfkill) {
+ ret = -ERFKILL;
+ goto out;
+ }
+
+ /* Someone called stop_device, don't try to start_fw */
+ if (trans_pcie->is_down) {
+ IWL_WARN(trans,
+ "Can't start_fw since the HW hasn't been started\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ /* make sure rfkill handshake bits are cleared */
+ iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+ iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
+ CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
+
+ /* clear (again), then enable host interrupts */
+ iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+ ret = iwl_pcie_nic_init(trans);
+ if (ret) {
+ IWL_ERR(trans, "Unable to init nic\n");
+ goto out;
+ }
+
+ /*
+ * Now, we load the firmware and don't want to be interrupted, even
+ * by the RF-Kill interrupt (hence mask all the interrupt besides the
+ * FH_TX interrupt which is needed to load the firmware). If the
+ * RF-Kill switch is toggled, we will find out after having loaded
+ * the firmware and return the proper value to the caller.
+ */
+ iwl_enable_fw_load_int(trans);
+
+ /* really make sure rfkill handshake bits are cleared */
+ iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+ iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+
+ /* Load the given image to the HW */
+ if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
+ ret = iwl_pcie_load_given_ucode_8000(trans, fw);
+ else
+ ret = iwl_pcie_load_given_ucode(trans, fw);
+ iwl_enable_interrupts(trans);
+
+ /* re-check RF-Kill state since we may have missed the interrupt */
+ hw_rfkill = iwl_is_rfkill_set(trans);
+ if (hw_rfkill)
+ set_bit(STATUS_RFKILL, &trans->status);
+ else
+ clear_bit(STATUS_RFKILL, &trans->status);
+
+ iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+ if (hw_rfkill && !run_in_rfkill)
+ ret = -ERFKILL;
+
+ out:
+ mutex_unlock(&trans_pcie->mutex);
+ return ret;
+ }
+
+ static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
+ {
+ iwl_pcie_reset_ict(trans);
+ iwl_pcie_tx_start(trans, scd_addr);
+ }
+
static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -1219,12 -1248,11 +1249,12 @@@ void iwl_trans_pcie_rf_kill(struct iwl_
_iwl_trans_pcie_stop_device(trans, true);
}
-static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test)
+static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test,
+ bool reset)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) {
+ if (!reset) {
/* Enable persistence mode to avoid reset */
iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG,
CSR_HW_IF_CONFIG_REG_PERSIST_MODE);
@@@ -1248,7 -1276,7 +1278,7 @@@
iwl_clear_bit(trans, CSR_GP_CNTRL,
CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
- if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D3) {
+ if (reset) {
/*
* reset TX queues -- some of their registers reset during S3
* so if we don't reset everything here the D3 image would try
@@@ -1262,7 -1290,7 +1292,7 @@@
static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans,
enum iwl_d3_status *status,
- bool test)
+ bool test, bool reset)
{
u32 val;
int ret;
@@@ -1297,7 -1325,7 +1327,7 @@@
iwl_pcie_set_pwr(trans, false);
- if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) {
+ if (!reset) {
iwl_clear_bit(trans, CSR_GP_CNTRL,
CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
} else {
@@@ -1355,10 -1383,6 +1385,10 @@@ static int _iwl_trans_pcie_start_hw(str
/* ... rfkill can call stop_device and set it false if needed */
iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+ /* Make sure we sync here, because we'll need full access later */
+ if (low_power)
+ pm_runtime_resume(trans->dev);
+
return 0;
}
@@@ -1428,6 -1452,12 +1458,6 @@@ static void iwl_trans_pcie_write_prph(s
iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WDAT, val);
}
-static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget)
-{
- WARN_ON(1);
- return 0;
-}
-
static void iwl_trans_pcie_configure(struct iwl_trans *trans,
const struct iwl_trans_config *trans_cfg)
{
@@@ -1464,8 -1494,11 +1494,8 @@@
* As this function may be called again in some corner cases don't
* do anything if NAPI was already initialized.
*/
- if (!trans_pcie->napi.poll) {
+ if (trans_pcie->napi_dev.reg_state != NETREG_DUMMY)
init_dummy_netdev(&trans_pcie->napi_dev);
- netif_napi_add(&trans_pcie->napi_dev, &trans_pcie->napi,
- iwl_pcie_dummy_napi_poll, 64);
- }
}
void iwl_trans_pcie_free(struct iwl_trans *trans)
@@@ -1473,9 -1506,6 +1503,9 @@@
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int i;
+ /* TODO: check if this is really needed */
+ pm_runtime_disable(trans->dev);
+
synchronize_irq(trans_pcie->pci_dev->irq);
iwl_pcie_tx_free(trans);
@@@ -1489,6 -1519,9 +1519,6 @@@
pci_release_regions(trans_pcie->pci_dev);
pci_disable_device(trans_pcie->pci_dev);
- if (trans_pcie->napi.poll)
- netif_napi_del(&trans_pcie->napi);
-
iwl_pcie_free_fw_monitor(trans);
for_each_possible_cpu(i) {
@@@ -1828,7 -1861,6 +1858,7 @@@ void iwl_trans_pcie_ref(struct iwl_tran
spin_lock_irqsave(&trans_pcie->ref_lock, flags);
IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count);
trans_pcie->ref_count++;
+ pm_runtime_get(&trans_pcie->pci_dev->dev);
spin_unlock_irqrestore(&trans_pcie->ref_lock, flags);
}
@@@ -1847,10 -1879,6 +1877,10 @@@ void iwl_trans_pcie_unref(struct iwl_tr
return;
}
trans_pcie->ref_count--;
+
+ pm_runtime_mark_last_busy(&trans_pcie->pci_dev->dev);
+ pm_runtime_put_autosuspend(&trans_pcie->pci_dev->dev);
+
spin_unlock_irqrestore(&trans_pcie->ref_lock, flags);
}
@@@ -2003,48 -2031,29 +2033,48 @@@ static ssize_t iwl_dbgfs_rx_queue_read(
{
struct iwl_trans *trans = file->private_data;
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- struct iwl_rxq *rxq = &trans_pcie->rxq;
- char buf[256];
- int pos = 0;
- const size_t bufsz = sizeof(buf);
-
- pos += scnprintf(buf + pos, bufsz - pos, "read: %u\n",
- rxq->read);
- pos += scnprintf(buf + pos, bufsz - pos, "write: %u\n",
- rxq->write);
- pos += scnprintf(buf + pos, bufsz - pos, "write_actual: %u\n",
- rxq->write_actual);
- pos += scnprintf(buf + pos, bufsz - pos, "need_update: %d\n",
- rxq->need_update);
- pos += scnprintf(buf + pos, bufsz - pos, "free_count: %u\n",
- rxq->free_count);
- if (rxq->rb_stts) {
- pos += scnprintf(buf + pos, bufsz - pos, "closed_rb_num: %u\n",
- le16_to_cpu(rxq->rb_stts->closed_rb_num) & 0x0FFF);
- } else {
- pos += scnprintf(buf + pos, bufsz - pos,
- "closed_rb_num: Not Allocated\n");
+ char *buf;
+ int pos = 0, i, ret;
+ size_t bufsz = sizeof(buf);
+
+ bufsz = sizeof(char) * 121 * trans->num_rx_queues;
+
+ if (!trans_pcie->rxq)
+ return -EAGAIN;
+
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < trans->num_rx_queues && pos < bufsz; i++) {
+ struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+ pos += scnprintf(buf + pos, bufsz - pos, "queue#: %2d\n",
+ i);
+ pos += scnprintf(buf + pos, bufsz - pos, "\tread: %u\n",
+ rxq->read);
+ pos += scnprintf(buf + pos, bufsz - pos, "\twrite: %u\n",
+ rxq->write);
+ pos += scnprintf(buf + pos, bufsz - pos, "\twrite_actual: %u\n",
+ rxq->write_actual);
+ pos += scnprintf(buf + pos, bufsz - pos, "\tneed_update: %2d\n",
+ rxq->need_update);
+ pos += scnprintf(buf + pos, bufsz - pos, "\tfree_count: %u\n",
+ rxq->free_count);
+ if (rxq->rb_stts) {
+ pos += scnprintf(buf + pos, bufsz - pos,
+ "\tclosed_rb_num: %u\n",
+ le16_to_cpu(rxq->rb_stts->closed_rb_num) &
+ 0x0FFF);
+ } else {
+ pos += scnprintf(buf + pos, bufsz - pos,
+ "\tclosed_rb_num: Not Allocated\n");
+ }
}
- return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
+ ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos);
+ kfree(buf);
+
+ return ret;
}
static ssize_t iwl_dbgfs_interrupt_read(struct file *file,
@@@ -2209,8 -2218,7 +2239,8 @@@ static u32 iwl_trans_pcie_dump_rbs(stru
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int max_len = PAGE_SIZE << trans_pcie->rx_page_order;
- struct iwl_rxq *rxq = &trans_pcie->rxq;
+ /* Dump RBs is supported only for pre-9000 devices (1 queue) */
+ struct iwl_rxq *rxq = &trans_pcie->rxq[0];
u32 i, r, j, rb_len = 0;
spin_lock(&rxq->lock);
@@@ -2405,8 -2413,7 +2435,8 @@@ static struct iwl_trans_dump_dat
u32 len, num_rbs;
u32 monitor_len;
int i, ptr;
- bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status);
+ bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status) &&
+ !trans->cfg->mq_rx_supported;
/* transport dump header */
len = sizeof(*dump_data);
@@@ -2461,12 -2468,11 +2491,12 @@@
len += sizeof(*data) + (FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND);
if (dump_rbs) {
+ /* Dump RBs is supported only for pre-9000 devices (1 queue) */
+ struct iwl_rxq *rxq = &trans_pcie->rxq[0];
/* RBs */
- num_rbs = le16_to_cpu(ACCESS_ONCE(
- trans_pcie->rxq.rb_stts->closed_rb_num))
+ num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num))
& 0x0FFF;
- num_rbs = (num_rbs - trans_pcie->rxq.read) & RX_QUEUE_MASK;
+ num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK;
len += num_rbs * (sizeof(*data) +
sizeof(struct iwl_fw_error_dump_rb) +
(PAGE_SIZE << trans_pcie->rx_page_order));
@@@ -2517,22 -2523,6 +2547,22 @@@
return dump_data;
}
+#ifdef CONFIG_PM_SLEEP
+static int iwl_trans_pcie_suspend(struct iwl_trans *trans)
+{
+ if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+ return iwl_pci_fw_enter_d0i3(trans);
+
+ return 0;
+}
+
+static void iwl_trans_pcie_resume(struct iwl_trans *trans)
+{
+ if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+ iwl_pci_fw_exit_d0i3(trans);
+}
+#endif /* CONFIG_PM_SLEEP */
+
static const struct iwl_trans_ops trans_ops_pcie = {
.start_hw = iwl_trans_pcie_start_hw,
.op_mode_leave = iwl_trans_pcie_op_mode_leave,
@@@ -2543,11 -2533,6 +2573,11 @@@
.d3_suspend = iwl_trans_pcie_d3_suspend,
.d3_resume = iwl_trans_pcie_d3_resume,
+#ifdef CONFIG_PM_SLEEP
+ .suspend = iwl_trans_pcie_suspend,
+ .resume = iwl_trans_pcie_resume,
+#endif /* CONFIG_PM_SLEEP */
+
.send_cmd = iwl_trans_pcie_send_hcmd,
.tx = iwl_trans_pcie_tx,
@@@ -2586,7 -2571,7 +2616,7 @@@ struct iwl_trans *iwl_trans_pcie_alloc(
struct iwl_trans_pcie *trans_pcie;
struct iwl_trans *trans;
u16 pci_cmd;
- int ret;
+ int ret, addr_size;
trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie),
&pdev->dev, cfg, &trans_ops_pcie, 0);
@@@ -2624,17 -2609,11 +2654,17 @@@
PCIE_LINK_STATE_CLKPM);
}
+ if (cfg->mq_rx_supported)
+ addr_size = 64;
+ else
+ addr_size = 36;
+
pci_set_master(pdev);
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size));
if (!ret)
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36));
+ ret = pci_set_consistent_dma_mask(pdev,
+ DMA_BIT_MASK(addr_size));
if (ret) {
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (!ret)
@@@ -2737,8 -2716,6 +2767,8 @@@
/* Initialize the wait queue for commands */
init_waitqueue_head(&trans_pcie->wait_command_queue);
+ init_waitqueue_head(&trans_pcie->d0i3_waitq);
+
ret = iwl_pcie_alloc_ict(trans);
if (ret)
goto out_pci_disable_msi;
@@@ -2753,12 -2730,6 +2783,12 @@@
trans_pcie->inta_mask = CSR_INI_SET_MASK;
+#ifdef CONFIG_IWLWIFI_PCIE_RTPM
+ trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3;
+#else
+ trans->runtime_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
+#endif /* CONFIG_IWLWIFI_PCIE_RTPM */
+
return trans;
out_free_ict:
diff --combined include/linux/netdevice.h
index 47671ce0,5440b7b..e52077f
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -51,7 -51,6 +51,7 @@@
#include <linux/neighbour.h>
#include <uapi/linux/netdevice.h>
#include <uapi/linux/if_bonding.h>
+#include <uapi/linux/pkt_cls.h>
struct netpoll_info;
struct device;
@@@ -779,25 -778,6 +779,25 @@@ static inline bool netdev_phys_item_id_
typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
struct sk_buff *skb);
+/* These structures hold the attributes of qdisc and classifiers
+ * that are being passed to the netdevice through the setup_tc op.
+ */
+enum {
+ TC_SETUP_MQPRIO,
+ TC_SETUP_CLSU32,
+};
+
+struct tc_cls_u32_offload;
+
+struct tc_to_netdev {
+ unsigned int type;
+ union {
+ u8 tc;
+ struct tc_cls_u32_offload *cls_u32;
+ };
+};
+
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@@ -1170,10 -1150,7 +1170,10 @@@ struct net_device_ops
int (*ndo_set_vf_rss_query_en)(
struct net_device *dev,
int vf, bool setting);
- int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
+ int (*ndo_setup_tc)(struct net_device *dev,
+ u32 handle,
+ __be16 protocol,
+ struct tc_to_netdev *tc);
#if IS_ENABLED(CONFIG_FCOE)
int (*ndo_fcoe_enable)(struct net_device *dev);
int (*ndo_fcoe_disable)(struct net_device *dev);
@@@ -1314,7 -1291,6 +1314,7 @@@
* @IFF_OPENVSWITCH: device is a Open vSwitch master
* @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
* @IFF_TEAM: device is a team device
+ * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@@ -1342,7 -1318,6 +1342,7 @@@
IFF_OPENVSWITCH = 1<<22,
IFF_L3MDEV_SLAVE = 1<<23,
IFF_TEAM = 1<<24,
+ IFF_RXFH_CONFIGURED = 1<<25,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@@ -1370,7 -1345,6 +1370,7 @@@
#define IFF_OPENVSWITCH IFF_OPENVSWITCH
#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
#define IFF_TEAM IFF_TEAM
+#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
/**
* struct net_device - The DEVICE structure.
@@@ -1423,8 -1397,6 +1423,8 @@@
* do not use this in drivers
* @tx_dropped: Dropped packets by core network,
* do not use this in drivers
+ * @rx_nohandler: nohandler dropped packets by core network on
+ * inactive devices, do not use this in drivers
*
* @wireless_handlers: List of functions to handle Wireless Extensions,
* instead of ioctl,
@@@ -1639,7 -1611,6 +1639,7 @@@ struct net_device
atomic_long_t rx_dropped;
atomic_long_t tx_dropped;
+ atomic_long_t rx_nohandler;
#ifdef CONFIG_WIRELESS_EXT
const struct iw_handler_def * wireless_handlers;
@@@ -3747,7 -3718,7 +3747,7 @@@ void *netdev_lower_get_next_private_rcu
void *netdev_lower_get_next(struct net_device *dev,
struct list_head **iter);
#define netdev_for_each_lower_dev(dev, ldev, iter) \
- for (iter = &(dev)->adj_list.lower, \
+ for (iter = (dev)->adj_list.lower.next, \
ldev = netdev_lower_get_next(dev, &(iter)); \
ldev; \
ldev = netdev_lower_get_next(dev, &(iter)))
@@@ -3770,7 -3741,7 +3770,7 @@@ void netdev_lower_state_changed(struct
/* RSS keys are 40 or 52 bytes long */
#define NETDEV_RSS_KEY_LEN 52
-extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
void netdev_rss_key_fill(void *buffer, size_t len);
int dev_get_nest_level(struct net_device *dev,
@@@ -4074,11 -4045,6 +4074,11 @@@ static inline bool netif_is_lag_port(co
return netif_is_bond_slave(dev) || netif_is_team_port(dev);
}
+static inline bool netif_is_rxfh_configured(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_RXFH_CONFIGURED;
+}
+
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
diff --combined include/linux/skbuff.h
index 6a57757,4ce9ff7..eab4f8f
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -299,6 -299,7 +299,7 @@@ struct sk_buff
#else
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
#endif
+ extern int sysctl_max_skb_frags;
typedef struct skb_frag_struct skb_frag_t;
@@@ -2161,11 -2162,6 +2162,11 @@@ static inline int skb_checksum_start_of
return skb->csum_start - skb_headroom(skb);
}
+static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
+{
+ return skb->head + skb->csum_start;
+}
+
static inline int skb_transport_offset(const struct sk_buff *skb)
{
return skb_transport_header(skb) - skb->data;
@@@ -2404,10 -2400,6 +2405,10 @@@ static inline struct sk_buff *napi_allo
{
return __napi_alloc_skb(napi, length, GFP_ATOMIC);
}
+void napi_consume_skb(struct sk_buff *skb, int budget);
+
+void __kfree_skb_flush(void);
+void __kfree_skb_defer(struct sk_buff *skb);
/**
* __dev_alloc_pages - allocate page for network Rx
@@@ -2630,13 -2622,6 +2631,13 @@@ static inline int skb_clone_writable(co
skb_headroom(skb) + len <= skb->hdr_len;
}
+static inline int skb_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
int cloned)
{
@@@ -3565,7 -3550,6 +3566,7 @@@ static inline struct sec_path *skb_sec_
struct skb_gso_cb {
int mac_offset;
int encap_level;
+ __wsum csum;
__u16 csum_start;
};
#define SKB_SGO_CB_OFFSET 32
@@@ -3592,16 -3576,6 +3593,16 @@@ static inline int gso_pskb_expand_head(
return 0;
}
+static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+{
+ /* Do not update partial checksums if remote checksum is enabled. */
+ if (skb->remcsum_offload)
+ return;
+
+ SKB_GSO_CB(skb)->csum = res;
+ SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+}
+
/* Compute the checksum for a gso segment. First compute the checksum value
* from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
* then add in skb->csum (checksum from csum_start to end of packet).
@@@ -3612,14 -3586,15 +3613,14 @@@
*/
static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
{
- int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
- skb_transport_offset(skb);
- __wsum partial;
+ unsigned char *csum_start = skb_transport_header(skb);
+ int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+ __wsum partial = SKB_GSO_CB(skb)->csum;
- partial = csum_partial(skb_transport_header(skb), plen, skb->csum);
- skb->csum = res;
- SKB_GSO_CB(skb)->csum_start -= plen;
+ SKB_GSO_CB(skb)->csum = res;
+ SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
- return csum_fold(partial);
+ return csum_fold(csum_partial(csum_start, plen, partial));
}
static inline bool skb_is_gso(const struct sk_buff *skb)
@@@ -3709,30 -3684,5 +3710,30 @@@ static inline unsigned int skb_gso_netw
return hdr_len + skb_gso_transport_seglen(skb);
}
+/* Local Checksum Offload.
+ * Compute outer checksum based on the assumption that the
+ * inner checksum will be offloaded later.
+ * See Documentation/networking/checksum-offloads.txt for
+ * explanation of how this works.
+ * Fill in outer checksum adjustment (e.g. with sum of outer
+ * pseudo-header) before calling.
+ * Also ensure that inner checksum is in linear data area.
+ */
+static inline __wsum lco_csum(struct sk_buff *skb)
+{
+ unsigned char *csum_start = skb_checksum_start(skb);
+ unsigned char *l4_hdr = skb_transport_header(skb);
+ __wsum partial;
+
+ /* Start with complement of inner checksum adjustment */
+ partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
+ skb->csum_offset));
+
+ /* Add in checksum of our headers (incl. outer checksum
+ * adjustment filled in by caller) and return result.
+ */
+ return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --combined include/net/ip_tunnels.h
index 4dd6163,dda9abf..5f28b60
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@@ -13,7 -13,6 +13,7 @@@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/lwtunnel.h>
+#include <net/dst_cache.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@@ -58,9 -57,6 +58,9 @@@ struct ip_tunnel_key
struct ip_tunnel_info {
struct ip_tunnel_key key;
+#ifdef CONFIG_DST_CACHE
+ struct dst_cache dst_cache;
+#endif
u8 options_len;
u8 mode;
};
@@@ -89,6 -85,11 +89,6 @@@ struct ip_tunnel_prl_entry
struct rcu_head rcu_head;
};
-struct ip_tunnel_dst {
- struct dst_entry __rcu *dst;
- __be32 saddr;
-};
-
struct metadata_dst;
struct ip_tunnel {
@@@ -107,7 -108,7 +107,7 @@@
int tun_hlen; /* Precalculated header length */
int mlink;
- struct ip_tunnel_dst __percpu *dst_cache;
+ struct dst_cache dst_cache;
struct ip_tunnel_parm parms;
@@@ -229,6 -230,7 +229,7 @@@ void ip_tunnel_xmit(struct sk_buff *skb
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
u8 *protocol, struct flowi4 *fl4);
+ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
@@@ -246,6 -248,7 +247,6 @@@ int ip_tunnel_changelink(struct net_dev
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
void ip_tunnel_setup(struct net_device *dev, int net_id);
-void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
int ip_tunnel_encap_setup(struct ip_tunnel *t,
struct ip_tunnel_encap *ipencap);
@@@ -270,15 -273,15 +271,15 @@@ static inline u8 ip_tunnel_ecn_encap(u
return INET_ECN_encapsulate(tos, inner);
}
-int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
+ bool xnet);
void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, u8 proto,
u8 tos, u8 ttl, __be16 df, bool xnet);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags);
-struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
- int gso_type_mask);
+struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
{
diff --combined include/net/tcp.h
index 9b2cb0c,ae6468f..e90db85
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -239,6 -239,13 +239,6 @@@ extern struct inet_timewait_death_row t
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack;
-extern int sysctl_tcp_fin_timeout;
-extern int sysctl_tcp_syn_retries;
-extern int sysctl_tcp_synack_retries;
-extern int sysctl_tcp_retries1;
-extern int sysctl_tcp_retries2;
-extern int sysctl_tcp_orphan_retries;
-extern int sysctl_tcp_syncookies;
extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
@@@ -267,6 -274,7 +267,6 @@@ extern int sysctl_tcp_thin_dupack
extern int sysctl_tcp_early_retrans;
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
-extern unsigned int sysctl_tcp_notsent_lowat;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_min_rtt_wlen;
extern int sysctl_tcp_autocorking;
@@@ -439,7 -447,7 +439,7 @@@ const u8 *tcp_parse_md5sig_option(cons
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
void tcp_v4_mtu_reduced(struct sock *sk);
- void tcp_req_err(struct sock *sk, u32 seq);
+ void tcp_req_err(struct sock *sk, u32 seq, bool abort);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(const struct sock *sk,
struct request_sock *req,
@@@ -560,7 -568,6 +560,7 @@@ void tcp_rearm_rto(struct sock *sk)
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
+void tcp_fin(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@@ -956,11 -963,9 +956,11 @@@ static inline void tcp_enable_fack(stru
*/
static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
{
+ struct net *net = sock_net((struct sock *)tp);
+
tp->do_early_retrans = sysctl_tcp_early_retrans &&
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
- sysctl_tcp_reordering == 3;
+ net->ipv4.sysctl_tcp_reordering == 3;
}
static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
@@@ -1247,7 -1252,7 +1247,7 @@@ static inline u32 keepalive_time_elapse
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@@ -1432,7 -1437,6 +1432,7 @@@ void tcp_free_fastopen_req(struct tcp_s
extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
@@@ -1681,8 -1685,7 +1681,8 @@@ void __tcp_v4_send_check(struct sk_buf
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
- return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat;
+ struct net *net = sock_net((struct sock *)tp);
+ return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
}
static inline bool tcp_stream_memory_free(const struct sock *sk)
diff --combined kernel/bpf/verifier.c
index 36dc497,2e7f7ab..2e08f8e
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -246,7 -246,6 +246,7 @@@ static const struct
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
+ {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
};
static void print_verifier_state(struct verifier_env *env)
@@@ -779,24 -778,15 +779,24 @@@ static int check_xadd(struct verifier_e
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized
*/
-static int check_stack_boundary(struct verifier_env *env,
- int regno, int access_size)
+static int check_stack_boundary(struct verifier_env *env, int regno,
+ int access_size, bool zero_size_allowed)
{
struct verifier_state *state = &env->cur_state;
struct reg_state *regs = state->regs;
int off, i;
- if (regs[regno].type != PTR_TO_STACK)
+ if (regs[regno].type != PTR_TO_STACK) {
+ if (zero_size_allowed && access_size == 0 &&
+ regs[regno].type == CONST_IMM &&
+ regs[regno].imm == 0)
+ return 0;
+
+ verbose("R%d type=%s expected=%s\n", regno,
+ reg_type_str[regs[regno].type],
+ reg_type_str[PTR_TO_STACK]);
return -EACCES;
+ }
off = regs[regno].imm;
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@@ -839,24 -829,15 +839,24 @@@ static int check_func_arg(struct verifi
return 0;
}
- if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
+ if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
- } else if (arg_type == ARG_CONST_STACK_SIZE) {
+ } else if (arg_type == ARG_CONST_STACK_SIZE ||
+ arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
expected_type = CONST_IMM;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
} else if (arg_type == ARG_PTR_TO_CTX) {
expected_type = PTR_TO_CTX;
+ } else if (arg_type == ARG_PTR_TO_STACK) {
+ expected_type = PTR_TO_STACK;
+ /* One exception here. In case function allows for NULL to be
+ * passed in as argument, it's a CONST_IMM type. Final test
+ * happens during stack boundary checking.
+ */
+ if (reg->type == CONST_IMM && reg->imm == 0)
+ expected_type = CONST_IMM;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
@@@ -886,8 -867,8 +886,8 @@@
verbose("invalid map_ptr to access map->key\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno, (*mapp)->key_size);
-
+ err = check_stack_boundary(env, regno, (*mapp)->key_size,
+ false);
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
@@@ -897,12 -878,9 +897,12 @@@
verbose("invalid map_ptr to access map->value\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno, (*mapp)->value_size);
+ err = check_stack_boundary(env, regno, (*mapp)->value_size,
+ false);
+ } else if (arg_type == ARG_CONST_STACK_SIZE ||
+ arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
+ bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
- } else if (arg_type == ARG_CONST_STACK_SIZE) {
/* bpf_xxx(..., buf, len) call will access 'len' bytes
* from stack pointer 'buf'. Check it
* note: regno == len, regno - 1 == buf
@@@ -912,8 -890,7 +912,8 @@@
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno - 1, reg->imm);
+ err = check_stack_boundary(env, regno - 1, reg->imm,
+ zero_size_allowed);
}
return err;
@@@ -934,11 -911,8 +934,11 @@@ static int check_map_func_compatibility
* don't allow any other map type to be passed into
* the special func;
*/
- if (bool_func && bool_map != bool_func)
+ if (bool_func && bool_map != bool_func) {
+ verbose("cannot pass map_type %d into func %d\n",
+ map->map_type, func_id);
return -EINVAL;
+ }
}
return 0;
@@@ -2108,7 -2082,7 +2108,7 @@@ static void adjust_branches(struct bpf_
/* adjust offset of jmps if necessary */
if (i < pos && i + insn->off + 1 > pos)
insn->off += delta;
- else if (i > pos && i + insn->off + 1 < pos)
+ else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
insn->off -= delta;
}
}
diff --combined lib/Kconfig.debug
index f890ee5,8bfd1ac..60d09e9
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -1400,6 -1400,21 +1400,21 @@@ config RCU_EQS_DEBU
endmenu # "RCU Debugging"
+ config DEBUG_WQ_FORCE_RR_CPU
+ bool "Force round-robin CPU selection for unbound work items"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ Workqueue used to implicitly guarantee that work items queued
+ without explicit CPU specified are put on the local CPU. This
+ guarantee is no longer true and while local CPU is still
+ preferred work items may be put on foreign CPUs. Kernel
+ parameter "workqueue.debug_force_rr_cpu" is added to force
+ round-robin CPU selection to flush out usages which depend on the
+ now broken guarantee. This config option enables the debug
+ feature by default. When enabled, memory and cache locality will
+ be impacted.
+
config DEBUG_BLOCK_EXT_DEVT
bool "Force extended block device numbers and spread them"
depends on DEBUG_KERNEL
@@@ -1738,14 -1753,6 +1753,14 @@@ config TEST_KSTRTO
config TEST_PRINTF
tristate "Test printf() family of functions at runtime"
+config TEST_BITMAP
+ tristate "Test bitmap_*() family of functions at runtime"
+ default n
+ help
+ Enable this option to test the bitmap functions at boot.
+
+ If unsure, say N.
+
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
default n
diff --combined net/batman-adv/gateway_client.c
index 261866e,ccf70be..4b598bd
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@@ -1,4 -1,4 +1,4 @@@
-/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@@ -28,7 -28,6 +28,7 @@@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
@@@ -60,29 -59,12 +60,29 @@@
*/
#define BATADV_DHCP_CHADDR_OFFSET 28
+/**
+ * batadv_gw_node_release - release gw_node from lists and queue for free after
+ * rcu grace period
+ * @ref: kref pointer of the gw_node
+ */
+static void batadv_gw_node_release(struct kref *ref)
+{
+ struct batadv_gw_node *gw_node;
+
+ gw_node = container_of(ref, struct batadv_gw_node, refcount);
+
+ batadv_orig_node_free_ref(gw_node->orig_node);
+ kfree_rcu(gw_node, rcu);
+}
+
+/**
+ * batadv_gw_node_free_ref - decrement the gw_node refcounter and possibly
+ * release it
+ * @gw_node: gateway node to free
+ */
static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node)
{
- if (atomic_dec_and_test(&gw_node->refcount)) {
- batadv_orig_node_free_ref(gw_node->orig_node);
- kfree_rcu(gw_node, rcu);
- }
+ kref_put(&gw_node->refcount, batadv_gw_node_release);
}
static struct batadv_gw_node *
@@@ -95,7 -77,7 +95,7 @@@ batadv_gw_get_selected_gw_node(struct b
if (!gw_node)
goto out;
- if (!atomic_inc_not_zero(&gw_node->refcount))
+ if (!kref_get_unless_zero(&gw_node->refcount))
gw_node = NULL;
out:
@@@ -118,7 -100,7 +118,7 @@@ batadv_gw_get_selected_orig(struct bata
if (!orig_node)
goto unlock;
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!kref_get_unless_zero(&orig_node->refcount))
orig_node = NULL;
unlock:
@@@ -136,7 -118,7 +136,7 @@@ static void batadv_gw_select(struct bat
spin_lock_bh(&bat_priv->gw.list_lock);
- if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
+ if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount))
new_gw_node = NULL;
curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
@@@ -188,7 -170,7 +188,7 @@@ batadv_gw_get_best_gw_node(struct batad
if (!router_ifinfo)
goto next;
- if (!atomic_inc_not_zero(&gw_node->refcount))
+ if (!kref_get_unless_zero(&gw_node->refcount))
goto next;
tq_avg = router_ifinfo->bat_iv.tq_avg;
@@@ -206,7 -188,7 +206,7 @@@
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
curr_gw = gw_node;
- atomic_inc(&curr_gw->refcount);
+ kref_get(&curr_gw->refcount);
}
break;
@@@ -221,7 -203,7 +221,7 @@@
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
curr_gw = gw_node;
- atomic_inc(&curr_gw->refcount);
+ kref_get(&curr_gw->refcount);
}
break;
}
@@@ -441,7 -423,7 +441,7 @@@ static void batadv_gw_node_add(struct b
if (gateway->bandwidth_down == 0)
return;
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!kref_get_unless_zero(&orig_node->refcount))
return;
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
@@@ -454,7 -436,7 +454,7 @@@
gw_node->orig_node = orig_node;
gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
- atomic_set(&gw_node->refcount, 1);
+ kref_init(&gw_node->refcount);
spin_lock_bh(&bat_priv->gw.list_lock);
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
@@@ -474,7 -456,7 +474,7 @@@
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator announcing gateway capabilities
*
- * Returns gateway node if found or NULL otherwise.
+ * Return: gateway node if found or NULL otherwise.
*/
static struct batadv_gw_node *
batadv_gw_node_get(struct batadv_priv *bat_priv,
@@@ -487,7 -469,7 +487,7 @@@
if (gw_node_tmp->orig_node != orig_node)
continue;
- if (!atomic_inc_not_zero(&gw_node_tmp->refcount))
+ if (!kref_get_unless_zero(&gw_node_tmp->refcount))
continue;
gw_node = gw_node_tmp;
@@@ -545,11 -527,12 +545,12 @@@ void batadv_gw_node_update(struct batad
* gets dereferenced.
*/
spin_lock_bh(&bat_priv->gw.list_lock);
- hlist_del_init_rcu(&gw_node->list);
+ if (!hlist_unhashed(&gw_node->list)) {
+ hlist_del_init_rcu(&gw_node->list);
+ batadv_gw_node_free_ref(gw_node);
+ }
spin_unlock_bh(&bat_priv->gw.list_lock);
- batadv_gw_node_free_ref(gw_node);
-
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
@@@ -673,13 -656,13 +674,13 @@@ out
* @chaddr: buffer where the client address will be stored. Valid
* only if the function returns BATADV_DHCP_TO_CLIENT
*
- * Returns:
+ * This function may re-allocate the data buffer of the skb passed as argument.
+ *
+ * Return:
* - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error
* while parsing it
* - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server
* - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client
- *
- * This function may re-allocate the data buffer of the skb passed as argument.
*/
enum batadv_dhcp_recipient
batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
@@@ -794,11 -777,11 +795,11 @@@
* server. Due to topology changes it may be the case that the GW server
* previously selected is not the best one anymore.
*
- * Returns true if the packet destination is unicast and it is not the best gw,
- * false otherwise.
- *
* This call might reallocate skb data.
* Must be invoked only when the DHCP packet is going TO a DHCP SERVER.
+ *
+ * Return: true if the packet destination is unicast and it is not the best gw,
+ * false otherwise.
*/
bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
struct sk_buff *skb)
diff --combined net/batman-adv/hard-interface.c
index fb2d9c0,57f71071..e2aaa4c
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@@ -1,4 -1,4 +1,4 @@@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@@ -18,7 -18,6 +18,7 @@@
#include "hard-interface.h"
#include "main.h"
+#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
@@@ -27,7 -26,6 +27,7 @@@
#include <linux/if_ether.h>
#include <linux/if.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
@@@ -49,19 -47,13 +49,19 @@@
#include "sysfs.h"
#include "translation-table.h"
-void batadv_hardif_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_hardif_release - release hard interface from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the hard interface
+ */
+void batadv_hardif_release(struct kref *ref)
{
struct batadv_hard_iface *hard_iface;
- hard_iface = container_of(rcu, struct batadv_hard_iface, rcu);
+ hard_iface = container_of(ref, struct batadv_hard_iface, refcount);
dev_put(hard_iface->net_dev);
- kfree(hard_iface);
+
+ kfree_rcu(hard_iface, rcu);
}
struct batadv_hard_iface *
@@@ -72,7 -64,7 +72,7 @@@ batadv_hardif_get_by_netdev(const struc
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
if (hard_iface->net_dev == net_dev &&
- atomic_inc_not_zero(&hard_iface->refcount))
+ kref_get_unless_zero(&hard_iface->refcount))
goto out;
}
@@@ -84,6 -76,28 +84,28 @@@ out
}
/**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+ static bool batadv_mutual_parents(const struct net_device *dev1,
+ const struct net_device *dev2)
+ {
+ int dev1_parent_iflink = dev_get_iflink(dev1);
+ int dev2_parent_iflink = dev_get_iflink(dev2);
+
+ if (!dev1_parent_iflink || !dev2_parent_iflink)
+ return false;
+
+ return (dev1_parent_iflink == dev2->ifindex) &&
+ (dev2_parent_iflink == dev1->ifindex);
+ }
+
+ /**
* batadv_is_on_batman_iface - check if a device is a batman iface descendant
* @net_dev: the device to check
*
@@@ -93,7 -107,7 +115,7 @@@
* This function recursively checks all the fathers of the device passed as
* argument looking for a batman-adv soft interface.
*
- * Returns true if the device is descendant of a batman-adv mesh interface (or
+ * Return: true if the device is descendant of a batman-adv mesh interface (or
* if it is a batman-adv interface itself), false otherwise
*/
static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
@@@ -116,6 -130,9 +138,9 @@@
if (WARN(!parent_dev, "Cannot find parent device"))
return false;
+ if (batadv_mutual_parents(net_dev, parent_dev))
+ return false;
+
ret = batadv_is_on_batman_iface(parent_dev);
return ret;
@@@ -144,7 -161,7 +169,7 @@@ static int batadv_is_valid_iface(const
* interface
* @net_device: the device to check
*
- * Returns true if the net device is a 802.11 wireless device, false otherwise.
+ * Return: true if the net device is a 802.11 wireless device, false otherwise.
*/
bool batadv_is_wifi_netdev(struct net_device *net_device)
{
@@@ -177,7 -194,7 +202,7 @@@ batadv_hardif_get_active(const struct n
continue;
if (hard_iface->if_status == BATADV_IF_ACTIVE &&
- atomic_inc_not_zero(&hard_iface->refcount))
+ kref_get_unless_zero(&hard_iface->refcount))
goto out;
}
@@@ -211,7 -228,7 +236,7 @@@ static void batadv_primary_if_select(st
ASSERT_RTNL();
- if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount))
+ if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount))
new_hard_iface = NULL;
curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
@@@ -409,8 -426,7 +434,8 @@@ batadv_hardif_deactivate_interface(stru
*
* Invoke ndo_del_slave on master passing slave as argument. In this way slave
* is free'd and master can correctly change its internal state.
- * Return 0 on success, a negative value representing the error otherwise
+ *
+ * Return: 0 on success, a negative value representing the error otherwise
*/
static int batadv_master_del_slave(struct batadv_hard_iface *slave,
struct net_device *master)
@@@ -439,7 -455,7 +464,7 @@@ int batadv_hardif_enable_interface(stru
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
goto out;
- if (!atomic_inc_not_zero(&hard_iface->refcount))
+ if (!kref_get_unless_zero(&hard_iface->refcount))
goto out;
soft_iface = dev_get_by_name(&init_net, iface_name);
@@@ -660,8 -676,7 +685,8 @@@ batadv_hardif_add_interface(struct net_
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
/* extra reference for return */
- atomic_set(&hard_iface->refcount, 2);
+ kref_init(&hard_iface->refcount);
+ kref_get(&hard_iface->refcount);
batadv_check_known_mac_addr(hard_iface->net_dev);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
diff --combined net/batman-adv/translation-table.c
index 1188279,0e80fd1..5c7fa02
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@@ -1,4 -1,4 +1,4 @@@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
@@@ -31,7 -31,6 +31,7 @@@
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@@ -69,15 -68,7 +69,15 @@@ static void batadv_tt_global_del(struc
unsigned short vid, const char *message,
bool roaming);
-/* returns 1 if they are the same mac addr and vid */
+/**
+ * batadv_compare_tt - check if two TT entries are the same
+ * @node: the list element pointer of the first TT entry
+ * @data2: pointer to the tt_common_entry of the second TT entry
+ *
+ * Compare the MAC address and the VLAN ID of the two TT entries and check if
+ * they are the same TT client.
+ * Return: 1 if the two TT clients are the same, 0 otherwise
+ */
static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_tt_common_entry,
@@@ -93,7 -84,7 +93,7 @@@
* @data: pointer to the tt_common_entry object to map
* @size: the size of the hash table
*
- * Returns the hash index where the object represented by 'data' should be
+ * Return: the hash index where the object represented by 'data' should be
* stored at.
*/
static inline u32 batadv_choose_tt(const void *data, u32 size)
@@@ -114,7 -105,7 +114,7 @@@
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
*
- * Returns a pointer to the tt_common struct belonging to the searched client if
+ * Return: a pointer to the tt_common struct belonging to the searched client if
* found, NULL otherwise.
*/
static struct batadv_tt_common_entry *
@@@ -142,7 -133,7 +142,7 @@@ batadv_tt_hash_find(struct batadv_hasht
if (tt->vid != vid)
continue;
- if (!atomic_inc_not_zero(&tt->refcount))
+ if (!kref_get_unless_zero(&tt->refcount))
continue;
tt_tmp = tt;
@@@ -159,7 -150,7 +159,7 @@@
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
*
- * Returns a pointer to the corresponding tt_local_entry struct if the client is
+ * Return: a pointer to the corresponding tt_local_entry struct if the client is
* found, NULL otherwise.
*/
static struct batadv_tt_local_entry *
@@@ -184,7 -175,7 +184,7 @@@ batadv_tt_local_hash_find(struct batadv
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
*
- * Returns a pointer to the corresponding tt_global_entry struct if the client
+ * Return: a pointer to the corresponding tt_global_entry struct if the client
* is found, NULL otherwise.
*/
static struct batadv_tt_global_entry *
@@@ -203,68 -194,34 +203,68 @@@ batadv_tt_global_hash_find(struct batad
return tt_global_entry;
}
+/**
+ * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
+ * for free after rcu grace period
+ * @ref: kref pointer of the nc_node
+ */
+static void batadv_tt_local_entry_release(struct kref *ref)
+{
+ struct batadv_tt_local_entry *tt_local_entry;
+
+ tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
+ common.refcount);
+
+ kfree_rcu(tt_local_entry, common.rcu);
+}
+
+/**
+ * batadv_tt_local_entry_free_ref - decrement the tt_local_entry refcounter and
+ * possibly release it
+ * @tt_local_entry: tt_local_entry to be free'd
+ */
static void
batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry)
{
- if (atomic_dec_and_test(&tt_local_entry->common.refcount))
- kfree_rcu(tt_local_entry, common.rcu);
+ kref_put(&tt_local_entry->common.refcount,
+ batadv_tt_local_entry_release);
}
/**
- * batadv_tt_global_entry_free_ref - decrement the refcounter for a
- * tt_global_entry and possibly free it
- * @tt_global_entry: the object to free
+ * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
+ * for free after rcu grace period
+ * @ref: kref pointer of the nc_node
+ */
+static void batadv_tt_global_entry_release(struct kref *ref)
+{
+ struct batadv_tt_global_entry *tt_global_entry;
+
+ tt_global_entry = container_of(ref, struct batadv_tt_global_entry,
+ common.refcount);
+
+ batadv_tt_global_del_orig_list(tt_global_entry);
+ kfree_rcu(tt_global_entry, common.rcu);
+}
+
+/**
+ * batadv_tt_global_entry_free_ref - decrement the tt_global_entry refcounter
+ * and possibly release it
+ * @tt_global_entry: tt_global_entry to be free'd
*/
static void
batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
{
- if (atomic_dec_and_test(&tt_global_entry->common.refcount)) {
- batadv_tt_global_del_orig_list(tt_global_entry);
- kfree_rcu(tt_global_entry, common.rcu);
- }
+ kref_put(&tt_global_entry->common.refcount,
+ batadv_tt_global_entry_release);
}
/**
* batadv_tt_global_hash_count - count the number of orig entries
- * @hash: hash table containing the tt entries
+ * @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to count entries for
* @vid: VLAN identifier
*
- * Return the number of originators advertising the given address/data
+ * Return: the number of originators advertising the given address/data
* (excluding ourself).
*/
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
@@@ -329,9 -286,9 +329,9 @@@ static void batadv_tt_local_size_dec(st
}
/**
- * batadv_tt_global_size_mod - change the size by v of the local table
- * identified by vid
- * @bat_priv: the bat priv with all the soft interface information
+ * batadv_tt_global_size_mod - change the size by v of the global table
+ * for orig_node identified by vid
+ * @orig_node: the originator for which the table has to be modified
* @vid: the VLAN identifier
* @v: the amount to sum to the global table size
*/
@@@ -346,9 -303,11 +346,11 @@@ static void batadv_tt_global_size_mod(s
if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
spin_lock_bh(&orig_node->vlan_list_lock);
- hlist_del_init_rcu(&vlan->list);
+ if (!hlist_unhashed(&vlan->list)) {
+ hlist_del_init_rcu(&vlan->list);
+ batadv_orig_node_vlan_free_ref(vlan);
+ }
spin_unlock_bh(&orig_node->vlan_list_lock);
- batadv_orig_node_vlan_free_ref(vlan);
}
batadv_orig_node_vlan_free_ref(vlan);
@@@ -381,28 -340,22 +383,28 @@@ static void batadv_tt_global_size_dec(s
/**
* batadv_tt_orig_list_entry_release - release tt orig entry from lists and
* queue for free after rcu grace period
- * @orig_entry: tt orig entry to be free'd
+ * @ref: kref pointer of the tt orig entry
*/
-static void
-batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
+static void batadv_tt_orig_list_entry_release(struct kref *ref)
{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ orig_entry = container_of(ref, struct batadv_tt_orig_list_entry,
+ refcount);
+
batadv_orig_node_free_ref(orig_entry->orig_node);
kfree_rcu(orig_entry, rcu);
}
+/**
+ * batadv_tt_orig_list_entry_free_ref - decrement the tt orig entry refcounter
+ * and possibly release it
+ * @orig_entry: tt orig entry to be free'd
+ */
static void
batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
{
- if (!atomic_dec_and_test(&orig_entry->refcount))
- return;
-
- batadv_tt_orig_list_entry_release(orig_entry);
+ kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
}
/**
@@@ -484,7 -437,7 +486,7 @@@ unlock
* batadv_tt_len - compute length in bytes of given number of tt changes
* @changes_num: number of tt changes
*
- * Returns computed length in bytes.
+ * Return: computed length in bytes.
*/
static int batadv_tt_len(int changes_num)
{
@@@ -495,7 -448,7 +497,7 @@@
* batadv_tt_entries - compute the number of entries fitting in tt_len bytes
* @tt_len: available space
*
- * Returns the number of entries.
+ * Return: the number of entries.
*/
static u16 batadv_tt_entries(u16 tt_len)
{
@@@ -507,7 -460,7 +509,7 @@@
* size when transmitted over the air
* @bat_priv: the bat priv with all the soft interface information
*
- * Returns local translation table size in bytes.
+ * Return: local translation table size in bytes.
*/
static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
{
@@@ -573,7 -526,7 +575,7 @@@ static void batadv_tt_global_free(struc
* @mark: the value contained in the skb->mark field of the received packet (if
* any)
*
- * Returns true if the client was successfully added, false otherwise.
+ * Return: true if the client was successfully added, false otherwise.
*/
bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
unsigned short vid, int ifindex, u32 mark)
@@@ -667,8 -620,7 +669,8 @@@
tt_local->common.vid = vid;
if (batadv_is_wifi_netdev(in_dev))
tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
- atomic_set(&tt_local->common.refcount, 2);
+ kref_init(&tt_local->common.refcount);
+ kref_get(&tt_local->common.refcount);
tt_local->last_seen = jiffies;
tt_local->common.added_at = tt_local->last_seen;
@@@ -769,11 -721,12 +771,11 @@@ out
* function reserves the amount of space needed to send the entire global TT
* table. In case of success the value is updated with the real amount of
* reserved bytes
-
* Allocate the needed amount of memory for the entire TT TVLV and write its
* header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
* objects, one per active VLAN served by the originator node.
*
- * Return the size of the allocated buffer or 0 in case of failure.
+ * Return: the size of the allocated buffer or 0 in case of failure.
*/
static u16
batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
@@@ -847,7 -800,7 +849,7 @@@ out
* header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
* objects, one per active VLAN.
*
- * Return the size of the allocated buffer or 0 in case of failure.
+ * Return: the size of the allocated buffer or 0 in case of failure.
*/
static u16
batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
@@@ -1089,7 -1042,7 +1091,7 @@@ batadv_tt_local_set_pending(struct bata
* @message: message to append to the log on deletion
* @roaming: true if the deletion is due to a roaming event
*
- * Returns the flags assigned to the local entry before being deleted
+ * Return: the flags assigned to the local entry before being deleted
*/
u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid, const char *message,
@@@ -1289,16 -1242,10 +1291,16 @@@ static void batadv_tt_changes_list_free
spin_unlock_bh(&bat_priv->tt.changes_list_lock);
}
-/* retrieves the orig_tt_list_entry belonging to orig_node from the
+/**
+ * batadv_tt_global_orig_entry_find - find a TT orig_list_entry
+ * @entry: the TT global entry where the orig_list_entry has to be
+ * extracted from
+ * @orig_node: the originator for which the orig_list_entry has to be found
+ *
+ * retrieve the orig_tt_list_entry belonging to orig_node from the
* batadv_tt_global_entry list
*
- * returns it with an increased refcounter, NULL if not found
+ * Return: it with an increased refcounter, NULL if not found
*/
static struct batadv_tt_orig_list_entry *
batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
@@@ -1312,7 -1259,7 +1314,7 @@@
hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
if (tmp_orig_entry->orig_node != orig_node)
continue;
- if (!atomic_inc_not_zero(&tmp_orig_entry->refcount))
+ if (!kref_get_unless_zero(&tmp_orig_entry->refcount))
continue;
orig_entry = tmp_orig_entry;
@@@ -1323,15 -1270,8 +1325,15 @@@
return orig_entry;
}
-/* find out if an orig_node is already in the list of a tt_global_entry.
- * returns true if found, false otherwise
+/**
+ * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled
+ * by a given originator
+ * @entry: the TT global entry to check
+ * @orig_node: the originator to search in the list
+ *
+ * find out if an orig_node is already in the list of a tt_global_entry.
+ *
+ * Return: true if found, false otherwise
*/
static bool
batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
@@@ -1369,12 -1309,11 +1371,12 @@@ batadv_tt_global_orig_entry_add(struct
goto out;
INIT_HLIST_NODE(&orig_entry->list);
- atomic_inc(&orig_node->refcount);
+ kref_get(&orig_node->refcount);
batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
- atomic_set(&orig_entry->refcount, 2);
+ kref_init(&orig_entry->refcount);
+ kref_get(&orig_entry->refcount);
spin_lock_bh(&tt_global->list_lock);
hlist_add_head_rcu(&orig_entry->list,
@@@ -1404,7 -1343,7 +1406,7 @@@ out
*
* The caller must hold orig_node refcount.
*
- * Return true if the new entry has been added, false otherwise
+ * Return: true if the new entry has been added, false otherwise
*/
static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@@ -1450,8 -1389,7 +1452,8 @@@
*/
if (flags & BATADV_TT_CLIENT_ROAM)
tt_global_entry->roam_at = jiffies;
- atomic_set(&common->refcount, 2);
+ kref_init(&common->refcount);
+ kref_get(&common->refcount);
common->added_at = jiffies;
INIT_HLIST_HEAD(&tt_global_entry->orig_list);
@@@ -1563,7 -1501,7 +1565,7 @@@ out
* @tt_global_entry: global translation table entry to be analyzed
*
* This functon assumes the caller holds rcu_read_lock().
- * Returns best originator list entry or NULL on errors.
+ * Return: best originator list entry or NULL on errors.
*/
static struct batadv_tt_orig_list_entry *
batadv_transtable_best_orig(struct batadv_priv *bat_priv,
@@@ -2093,7 -2031,7 +2095,7 @@@ _batadv_is_ap_isolated(struct batadv_tt
* @addr: mac address of the destination client
* @vid: VLAN identifier
*
- * Returns a pointer to the originator that was selected as destination in the
+ * Return: a pointer to the originator that was selected as destination in the
* mesh for contacting the client 'addr', NULL otherwise.
* In case of multiple originators serving the same client, the function returns
* the best one (best in terms of metric towards the destination node).
@@@ -2133,7 -2071,7 +2135,7 @@@ struct batadv_orig_node *batadv_transta
/* found anything? */
if (best_entry)
orig_node = best_entry->orig_node;
- if (orig_node && !atomic_inc_not_zero(&orig_node->refcount))
+ if (orig_node && !kref_get_unless_zero(&orig_node->refcount))
orig_node = NULL;
rcu_read_unlock();
@@@ -2168,7 -2106,7 +2170,7 @@@ out
* because the XOR operation can combine them all while trying to reduce the
* noise as much as possible.
*
- * Returns the checksum of the global table of a given originator.
+ * Return: the checksum of the global table of a given originator.
*/
static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@@ -2245,7 -2183,7 +2247,7 @@@
* For details about the computation, please refer to the documentation for
* batadv_tt_global_crc().
*
- * Returns the checksum of the local table
+ * Return: the checksum of the local table
*/
static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
unsigned short vid)
@@@ -2351,7 -2289,7 +2353,7 @@@ static void batadv_tt_req_purge(struct
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node this request is being issued for
*
- * Returns the pointer to the new tt_req_node struct if no request
+ * Return: the pointer to the new tt_req_node struct if no request
* has already been issued for this orig_node, NULL otherwise.
*/
static struct batadv_tt_req_node *
@@@ -2386,7 -2324,7 +2388,7 @@@ unlock
* @entry_ptr: to be checked local tt entry
* @data_ptr: not used but definition required to satisfy the callback prototype
*
- * Returns 1 if the entry is a valid, 0 otherwise.
+ * Return: 1 if the entry is a valid, 0 otherwise.
*/
static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
{
@@@ -2470,8 -2408,9 +2472,8 @@@ static void batadv_tt_tvlv_generate(str
* @orig_node: originator for which the CRCs have to be checked
* @tt_vlan: pointer to the first tvlv VLAN entry
* @num_vlan: number of tvlv VLAN entries
- * @create: if true, create VLAN objects if not found
*
- * Return true if all the received CRCs match the locally stored ones, false
+ * Return: true if all the received CRCs match the locally stored ones, false
* otherwise
*/
static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
@@@ -2574,8 -2513,6 +2576,8 @@@ static void batadv_tt_global_update_crc
* @num_vlan: number of tvlv VLAN entries
* @full_table: ask for the entire translation table if true, while only for the
* last TT diff otherwise
+ *
+ * Return: true if the TT Request was sent, false otherwise
*/
static int batadv_send_tt_request(struct batadv_priv *bat_priv,
struct batadv_orig_node *dst_orig_node,
@@@ -2656,7 -2593,7 +2658,7 @@@ out
* @req_src: mac address of tt request sender
* @req_dst: mac address of tt request recipient
*
- * Returns true if tt request reply was sent, false otherwise.
+ * Return: true if tt request reply was sent, false otherwise.
*/
static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
@@@ -2788,7 -2725,7 +2790,7 @@@ out
* @tt_data: tt data containing the tt request information
* @req_src: mac address of tt request sender
*
- * Returns true if tt request reply was sent, false otherwise.
+ * Return: true if tt request reply was sent, false otherwise.
*/
static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
@@@ -2906,7 -2843,7 +2908,7 @@@ out
* @req_src: mac address of tt request sender
* @req_dst: mac address of tt request recipient
*
- * Returns true if tt request reply was sent, false otherwise.
+ * Return: true if tt request reply was sent, false otherwise.
*/
static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
@@@ -3001,7 -2938,7 +3003,7 @@@ static void batadv_tt_update_changes(st
* @addr: the mac address of the client to check
* @vid: VLAN identifier
*
- * Returns true if the client is served by this node, false otherwise.
+ * Return: true if the client is served by this node, false otherwise.
*/
bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid)
@@@ -3118,16 -3055,11 +3120,16 @@@ static void batadv_tt_roam_purge(struc
spin_unlock_bh(&bat_priv->tt.roam_list_lock);
}
-/* This function checks whether the client already reached the
+/**
+ * batadv_tt_check_roam_count - check if a client has roamed too frequently
+ * @bat_priv: the bat priv with all the soft interface information
+ * @client: mac address of the roaming client
+ *
+ * This function checks whether the client already reached the
* maximum number of possible roaming phases. In this case the ROAMING_ADV
* will not be sent.
*
- * returns true if the ROAMING_ADV can be sent, false otherwise
+ * Return: true if the ROAMING_ADV can be sent, false otherwise
*/
static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
{
@@@ -3439,12 -3371,13 +3441,12 @@@ out
* batadv_tt_update_orig - update global translation table with new tt
* information received via ogms
* @bat_priv: the bat priv with all the soft interface information
- * @orig: the orig_node of the ogm
- * @tt_vlan: pointer to the first tvlv VLAN entry
+ * @orig_node: the orig_node of the ogm
+ * @tt_buff: pointer to the first tvlv VLAN entry
* @tt_num_vlan: number of tvlv VLAN entries
* @tt_change: pointer to the first entry in the TT buffer
* @tt_num_changes: number of tt changes inside the tt buffer
* @ttvn: translation table version number of this changeset
- * @tt_crc: crc32 checksum of orig node's translation table
*/
static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@@ -3526,7 -3459,7 +3528,7 @@@ request_table
* @addr: the mac address of the client to check
* @vid: VLAN identifier
*
- * Returns true if we know that the client has moved from its old originator
+ * Return: true if we know that the client has moved from its old originator
* to another one. This entry is still kept for consistency purposes and will be
* deleted later by a DEL or because of timeout
*/
@@@ -3552,7 -3485,7 +3554,7 @@@ out
* @addr: the mac address of the local client to query
* @vid: VLAN identifier
*
- * Returns true if the local client is known to be roaming (it is not served by
+ * Return: true if the local client is known to be roaming (it is not served by
* this node anymore) or not. If yes, the client is still present in the table
* to keep the latter consistent with the node TTVN
*/
@@@ -3681,7 -3614,7 +3683,7 @@@ static void batadv_tt_tvlv_ogm_handler_
* @tvlv_value: tvlv buffer containing the tt data
* @tvlv_value_len: tvlv buffer length
*
- * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
+ * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
* otherwise.
*/
static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@@ -3762,7 -3695,7 +3764,7 @@@
* @tvlv_value: tvlv buffer containing the tt data
* @tvlv_value_len: tvlv buffer length
*
- * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
+ * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
* otherwise.
*/
static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@@ -3808,7 -3741,7 +3810,7 @@@ out
* batadv_tt_init - initialise the translation table internals
* @bat_priv: the bat priv with all the soft interface information
*
- * Return 0 on success or negative error number in case of failure.
+ * Return: 0 on success or negative error number in case of failure.
*/
int batadv_tt_init(struct batadv_priv *bat_priv)
{
@@@ -3846,7 -3779,7 +3848,7 @@@
* @addr: the mac address of the client
* @vid: the identifier of the VLAN where this client is connected
*
- * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false
+ * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false
* otherwise
*/
bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
diff --combined net/bridge/br_mdb.c
index cf51b7b,74c278e..73786e2
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@@ -41,14 -41,6 +41,14 @@@ fail
return -EMSGSIZE;
}
+static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
+{
+ e->state = flags & MDB_PG_FLAGS_PERMANENT;
+ e->flags = 0;
+ if (flags & MDB_PG_FLAGS_OFFLOAD)
+ e->flags |= MDB_FLAGS_OFFLOAD;
+}
+
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
@@@ -88,41 -80,26 +88,41 @@@
for (pp = &mp->ports;
(p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
+ struct nlattr *nest_ent;
+ struct br_mdb_entry e;
+
port = p->port;
- if (port) {
- struct br_mdb_entry e;
- memset(&e, 0, sizeof(e));
- e.ifindex = port->dev->ifindex;
- e.state = p->state;
- e.vid = p->addr.vid;
- if (p->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = p->addr.u.ip4;
+ if (!port)
+ continue;
+
+ memset(&e, 0, sizeof(e));
+ e.ifindex = port->dev->ifindex;
+ e.vid = p->addr.vid;
+ __mdb_entry_fill_flags(&e, p->flags);
+ if (p->addr.proto == htons(ETH_P_IP))
+ e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- if (p->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = p->addr.u.ip6;
+ if (p->addr.proto == htons(ETH_P_IPV6))
+ e.addr.u.ip6 = p->addr.u.ip6;
#endif
- e.addr.proto = p->addr.proto;
- if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
+ e.addr.proto = p->addr.proto;
+ nest_ent = nla_nest_start(skb,
+ MDBA_MDB_ENTRY_INFO);
+ if (!nest_ent) {
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
}
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb,
+ MDBA_MDB_EATTR_TIMER,
+ br_timer_value(&p->timer))) {
+ nla_nest_cancel(skb, nest_ent);
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
+ }
+ nla_nest_end(skb, nest_ent);
}
nla_nest_end(skb, nest2);
skip:
@@@ -232,7 -209,7 +232,7 @@@ static inline size_t rtnl_mdb_nlmsg_siz
}
static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
- int type)
+ int type, struct net_bridge_port_group *pg)
{
struct switchdev_obj_port_mdb mdb = {
.obj = {
@@@ -255,13 -232,10 +255,13 @@@
#endif
mdb.obj.orig_dev = port_dev;
- if (port_dev && type == RTM_NEWMDB)
- switchdev_port_obj_add(port_dev, &mdb.obj);
- else if (port_dev && type == RTM_DELMDB)
+ if (port_dev && type == RTM_NEWMDB) {
+ err = switchdev_port_obj_add(port_dev, &mdb.obj);
+ if (!err && pg)
+ pg->flags |= MDB_PG_FLAGS_OFFLOAD;
+ } else if (port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
+ }
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
@@@ -279,21 -253,21 +279,21 @@@ errout
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type, u8 state)
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
+ int type)
{
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.ifindex = port->dev->ifindex;
- entry.addr.proto = group->proto;
- entry.addr.u.ip4 = group->u.ip4;
+ entry.ifindex = pg->port->dev->ifindex;
+ entry.addr.proto = pg->addr.proto;
+ entry.addr.u.ip4 = pg->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- entry.addr.u.ip6 = group->u.ip6;
+ entry.addr.u.ip6 = pg->addr.u.ip6;
#endif
- entry.state = state;
- entry.vid = group->vid;
- __br_mdb_notify(dev, &entry, type);
+ entry.vid = pg->addr.vid;
+ __mdb_entry_fill_flags(&entry, pg->flags);
+ __br_mdb_notify(dev, &entry, type, pg);
}
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@@ -438,8 -412,7 +438,8 @@@ static int br_mdb_parse(struct sk_buff
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, unsigned char state)
+ struct br_ip *group, unsigned char state,
+ struct net_bridge_port_group **pg)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
@@@ -452,8 -425,8 +452,8 @@@
mp = br_mdb_ip_get(mdb, group);
if (!mp) {
mp = br_multicast_new_group(br, port, group);
- err = PTR_ERR(mp);
- if (IS_ERR(mp))
+ err = PTR_ERR_OR_ZERO(mp);
+ if (err)
return err;
}
@@@ -470,7 -443,6 +470,7 @@@
if (unlikely(!p))
return -ENOMEM;
rcu_assign_pointer(*pp, p);
+ *pg = p;
if (state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@@ -478,8 -450,7 +478,8 @@@
}
static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct br_mdb_entry *entry)
+ struct br_mdb_entry *entry,
+ struct net_bridge_port_group **pg)
{
struct br_ip ip;
struct net_device *dev;
@@@ -508,7 -479,7 +508,7 @@@
#endif
spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, &ip, entry->state);
+ ret = br_mdb_add_group(br, p, &ip, entry->state, pg);
spin_unlock_bh(&br->multicast_lock);
return ret;
}
@@@ -516,7 -487,6 +516,7 @@@
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
+ struct net_bridge_port_group *pg;
struct net_bridge_vlan_group *vg;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
@@@ -546,15 -516,15 +546,15 @@@
if (br_vlan_enabled(br) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, entry, &pg);
if (err)
break;
- __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
}
} else {
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, entry, &pg);
if (!err)
- __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
}
return err;
@@@ -598,7 -568,7 +598,7 @@@ static int __br_mdb_del(struct net_brid
if (p->port->state == BR_STATE_DISABLED)
goto unlock;
- entry->state = p->state;
+ __mdb_entry_fill_flags(entry, p->flags);
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
@@@ -650,12 -620,12 +650,12 @@@ static int br_mdb_del(struct sk_buff *s
entry->vid = v->vid;
err = __br_mdb_del(br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB);
+ __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
}
} else {
err = __br_mdb_del(br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB);
+ __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
}
return err;
diff --combined net/core/dev.c
index 3f4071a,0ef061b..edb7179
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -3829,14 -3829,8 +3829,14 @@@ static void net_tx_action(struct softir
trace_consume_skb(skb);
else
trace_kfree_skb(skb, net_tx_action);
- __kfree_skb(skb);
+
+ if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ __kfree_skb(skb);
+ else
+ __kfree_skb_defer(skb);
}
+
+ __kfree_skb_flush();
}
if (sd->output_queue) {
@@@ -4160,10 -4154,7 +4160,10 @@@ ncls
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
drop:
- atomic_long_inc(&skb->dev->rx_dropped);
+ if (!deliver_exact)
+ atomic_long_inc(&skb->dev->rx_dropped);
+ else
+ atomic_long_inc(&skb->dev->rx_nohandler);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
@@@ -5161,7 -5152,6 +5161,7 @@@ static void net_rx_action(struct softir
}
}
+ __kfree_skb_flush();
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@@ -5389,12 -5379,12 +5389,12 @@@ void *netdev_lower_get_next(struct net_
{
struct netdev_adjacent *lower;
- lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+ lower = list_entry(*iter, struct netdev_adjacent, list);
if (&lower->list == &dev->adj_list.lower)
return NULL;
- *iter = &lower->list;
+ *iter = lower->list.next;
return lower->dev;
}
@@@ -7263,31 -7253,24 +7263,31 @@@ void netdev_run_todo(void
}
}
-/* Convert net_device_stats to rtnl_link_stats64. They have the same
- * fields in the same order, with only the type differing.
+/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
+ * all the same fields in the same order as net_device_stats, with only
+ * the type differing, but rtnl_link_stats64 may have additional fields
+ * at the end for newer counters.
*/
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
const struct net_device_stats *netdev_stats)
{
#if BITS_PER_LONG == 64
- BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+ BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
memcpy(stats64, netdev_stats, sizeof(*stats64));
+ /* zero out counters that only exist in rtnl_link_stats64 */
+ memset((char *)stats64 + sizeof(*netdev_stats), 0,
+ sizeof(*stats64) - sizeof(*netdev_stats));
#else
- size_t i, n = sizeof(*stats64) / sizeof(u64);
+ size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
const unsigned long *src = (const unsigned long *)netdev_stats;
u64 *dst = (u64 *)stats64;
- BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
- sizeof(*stats64) / sizeof(u64));
+ BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
dst[i] = src[i];
+ /* zero out counters that only exist in rtnl_link_stats64 */
+ memset((char *)stats64 + n * sizeof(u64), 0,
+ sizeof(*stats64) - n * sizeof(u64));
#endif
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
@@@ -7317,7 -7300,6 +7317,7 @@@ struct rtnl_link_stats64 *dev_get_stats
}
storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@@ -7440,8 -7422,10 +7440,10 @@@ struct net_device *alloc_netdev_mqs(in
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
- if (!dev->tx_queue_len)
+ if (!dev->tx_queue_len) {
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->tx_queue_len = 1;
+ }
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
diff --combined net/core/skbuff.c
index 8bd4b79,5bf88f5..488566b
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -79,6 -79,8 +79,8 @@@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+ int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+ EXPORT_SYMBOL(sysctl_max_skb_frags);
/**
* skb_panic - private function for out-of-line support
@@@ -347,16 -349,8 +349,16 @@@ struct sk_buff *build_skb(void *data, u
}
EXPORT_SYMBOL(build_skb);
+#define NAPI_SKB_CACHE_SIZE 64
+
+struct napi_alloc_cache {
+ struct page_frag_cache page;
+ size_t skb_count;
+ void *skb_cache[NAPI_SKB_CACHE_SIZE];
+};
+
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
@@@ -386,9 -380,9 +388,9 @@@ EXPORT_SYMBOL(netdev_alloc_frag)
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
- struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return __alloc_page_frag(nc, fragsz, gfp_mask);
+ return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
@@@ -482,7 -476,7 +484,7 @@@ EXPORT_SYMBOL(__netdev_alloc_skb)
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask)
{
- struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
void *data;
@@@ -502,7 -496,7 +504,7 @@@
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- data = __alloc_page_frag(nc, len, gfp_mask);
+ data = __alloc_page_frag(&nc->page, len, gfp_mask);
if (unlikely(!data))
return NULL;
@@@ -513,7 -507,7 +515,7 @@@
}
/* use OR instead of assignment to avoid clearing of bits in mask */
- if (nc->pfmemalloc)
+ if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@@ -755,73 -749,6 +757,73 @@@ void consume_skb(struct sk_buff *skb
}
EXPORT_SYMBOL(consume_skb);
+void __kfree_skb_flush(void)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+ /* flush skb_cache if containing objects */
+ if (nc->skb_count) {
+ kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
+ nc->skb_cache);
+ nc->skb_count = 0;
+ }
+}
+
+static inline void _kfree_skb_defer(struct sk_buff *skb)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+ /* drop skb->head and call any destructors for packet */
+ skb_release_all(skb);
+
+ /* record skb to CPU local list */
+ nc->skb_cache[nc->skb_count++] = skb;
+
+#ifdef CONFIG_SLUB
+ /* SLUB writes into objects when freeing */
+ prefetchw(skb);
+#endif
+
+ /* flush skb_cache if it is filled */
+ if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
+ kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
+ nc->skb_cache);
+ nc->skb_count = 0;
+ }
+}
+void __kfree_skb_defer(struct sk_buff *skb)
+{
+ _kfree_skb_defer(skb);
+}
+
+void napi_consume_skb(struct sk_buff *skb, int budget)
+{
+ if (unlikely(!skb))
+ return;
+
+ /* if budget is 0 assume netpoll w/ IRQs disabled */
+ if (unlikely(!budget)) {
+ dev_consume_skb_irq(skb);
+ return;
+ }
+
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+ /* if reaching here SKB is ready to free */
+ trace_consume_skb(skb);
+
+ /* if SKB is a clone, don't handle this case */
+ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+ __kfree_skb(skb);
+ return;
+ }
+
+ _kfree_skb_defer(skb);
+}
+EXPORT_SYMBOL(napi_consume_skb);
+
/* Make sure a field is enclosed inside headers_start/headers_end section */
#define CHECK_SKB_FIELD(field) \
BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
@@@ -3079,7 -3006,8 +3081,7 @@@ struct sk_buff *skb_segment(struct sk_b
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
- csum = !head_skb->encap_hdr_csum &&
- !!can_checksum_protocol(features, proto);
+ csum = !!can_checksum_protocol(features, proto);
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@@ -3172,15 -3100,13 +3174,15 @@@
if (nskb->len == len + doffset)
goto perform_csum_check;
- if (!sg && !nskb->remcsum_offload) {
- nskb->ip_summed = CHECKSUM_NONE;
- nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
- skb_put(nskb, len),
- len, 0);
+ if (!sg) {
+ if (!nskb->remcsum_offload)
+ nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum =
+ skb_copy_and_csum_bits(head_skb, offset,
+ skb_put(nskb, len),
+ len, 0);
SKB_GSO_CB(nskb)->csum_start =
- skb_headroom(nskb) + doffset;
+ skb_headroom(nskb) + doffset;
continue;
}
@@@ -3246,19 -3172,12 +3248,19 @@@ skip_fraglist
nskb->truesize += nskb->data_len;
perform_csum_check:
- if (!csum && !nskb->remcsum_offload) {
- nskb->csum = skb_checksum(nskb, doffset,
- nskb->len - doffset, 0);
- nskb->ip_summed = CHECKSUM_NONE;
+ if (!csum) {
+ if (skb_has_shared_frag(nskb)) {
+ err = __skb_linearize(nskb);
+ if (err)
+ goto err;
+ }
+ if (!nskb->remcsum_offload)
+ nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum =
+ skb_checksum(nskb, doffset,
+ nskb->len - doffset, 0);
SKB_GSO_CB(nskb)->csum_start =
- skb_headroom(nskb) + doffset;
+ skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);
@@@ -4496,7 -4415,9 +4498,7 @@@ int skb_vlan_push(struct sk_buff *skb,
skb->mac_len += VLAN_HLEN;
__skb_pull(skb, offset);
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(skb->data
- + (2 * ETH_ALEN), VLAN_HLEN, 0));
+ skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
}
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
diff --combined net/dccp/ipv4.c
index 1e0c600,902d606..b5672e5
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@@ -802,7 -802,7 +802,7 @@@ static int dccp_v4_rcv(struct sk_buff *
}
lookup:
- sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
@@@ -824,26 -824,26 +824,26 @@@
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v4_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --combined net/dccp/ipv6.c
index 45cbe85,b8608b7..4663a01
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@@ -668,7 -668,7 +668,7 @@@ static int dccp_v6_rcv(struct sk_buff *
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
lookup:
- sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb));
if (!sk) {
@@@ -691,26 -691,26 +691,26 @@@
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v6_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@@ -993,7 -993,7 +993,7 @@@ static struct proto dccp_v6_prot =
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v6_do_rcv,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
diff --combined net/ipv4/devinet.c
index 3d83531,f6303b1..29b8d3a
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@@ -1847,7 -1847,7 +1847,7 @@@ static int inet_netconf_get_devconf(str
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@@ -2185,8 -2185,6 +2185,8 @@@ static struct devinet_sysctl_table
"igmpv3_unsolicited_report_interval"),
DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
"ignore_routes_with_linkdown"),
+ DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
+ "drop_gratuitous_arp"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@@ -2194,8 -2192,6 +2194,8 @@@
"promote_secondaries"),
DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
"route_localnet"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
+ "drop_unicast_in_l2_multicast"),
},
};
diff --combined net/ipv4/inet_connection_sock.c
index 3d28c6d,6414891..d768230
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@@ -24,7 -24,6 +24,7 @@@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@@ -68,8 -67,7 +68,8 @@@ int inet_csk_bind_conflict(const struc
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@@ -91,153 -89,161 +91,153 @@@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflic
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
+ * We try to allocate an odd port (and leave even ports for connect())
*/
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ int ret = 1, attempts = 5, port = snum;
+ int smallest_size = -1, smallest_port;
struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret, attempts = 5;
struct net *net = sock_net(sk);
- int smallest_size = -1, smallest_rover;
+ int i, low, high, attempt_half;
+ struct inet_bind_bucket *tb;
kuid_t uid = sock_i_uid(sk);
- int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
+ u32 remaining, offset;
- local_bh_disable();
- if (!snum) {
- int remaining, rover, low, high;
+ if (port) {
+have_port:
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->port == port)
+ goto tb_found;
+ goto tb_not_found;
+ }
again:
- inet_get_local_port_range(net, &low, &high);
- if (attempt_half) {
- int half = low + ((high - low) >> 1);
-
- if (attempt_half == 1)
- high = half;
- else
- low = half;
- }
- remaining = (high - low) + 1;
- smallest_rover = rover = prandom_u32() % remaining + low;
-
- smallest_size = -1;
- do {
- if (inet_is_local_reserved_port(net, rover))
- goto next_nolock;
- head = &hashinfo->bhash[inet_bhashfn(net, rover,
- hashinfo->bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == rover) {
- if (((tb->fastreuse > 0 &&
- sk->sk_reuse &&
- sk->sk_state != TCP_LISTEN) ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport &&
- uid_eq(tb->fastuid, uid))) &&
- (tb->num_owners < smallest_size || smallest_size == -1)) {
- smallest_size = tb->num_owners;
- smallest_rover = rover;
- }
- if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
- snum = rover;
- goto tb_found;
- }
- goto next;
+ attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
+other_half_scan:
+ inet_get_local_port_range(net, &low, &high);
+ high++; /* [32768, 60999] -> [32768, 61000[ */
+ if (high - low < 4)
+ attempt_half = 0;
+ if (attempt_half) {
+ int half = low + (((high - low) >> 2) << 1);
+
+ if (attempt_half == 1)
+ high = half;
+ else
+ low = half;
+ }
+ remaining = high - low;
+ if (likely(remaining > 1))
+ remaining &= ~1U;
+
+ offset = prandom_u32() % remaining;
+ /* __inet_hash_connect() favors ports having @low parity
+ * We do the opposite to not pollute connect() users.
+ */
+ offset |= 1U;
+ smallest_size = -1;
+ smallest_port = low; /* avoid compiler warning */
+
+other_parity_scan:
+ port = low + offset;
+ for (i = 0; i < remaining; i += 2, port += 2) {
+ if (unlikely(port >= high))
+ port -= remaining;
+ if (inet_is_local_reserved_port(net, port))
+ continue;
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->port == port) {
+ if (((tb->fastreuse > 0 && reuse) ||
+ (tb->fastreuseport > 0 &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
+ (tb->num_owners < smallest_size || smallest_size == -1)) {
+ smallest_size = tb->num_owners;
+ smallest_port = port;
}
- break;
- next:
- spin_unlock(&head->lock);
- next_nolock:
- if (++rover > high)
- rover = low;
- } while (--remaining > 0);
-
- /* Exhausted local port range during search? It is not
- * possible for us to be holding one of the bind hash
- * locks if this test triggers, because if 'remaining'
- * drops to zero, we broke out of the do/while loop at
- * the top level, not from the 'break;' statement.
- */
- ret = 1;
- if (remaining <= 0) {
- if (smallest_size != -1) {
- snum = smallest_rover;
- goto have_snum;
- }
- if (attempt_half == 1) {
- /* OK we now try the upper half of the range */
- attempt_half = 2;
- goto again;
+ if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
+ goto tb_found;
+ goto next_port;
}
- goto fail;
- }
- /* OK, here is the one we will use. HEAD is
- * non-NULL and we hold it's mutex.
- */
- snum = rover;
- } else {
-have_snum:
- head = &hashinfo->bhash[inet_bhashfn(net, snum,
- hashinfo->bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == snum)
- goto tb_found;
+ goto tb_not_found;
+next_port:
+ spin_unlock_bh(&head->lock);
+ cond_resched();
+ }
+
+ if (smallest_size != -1) {
+ port = smallest_port;
+ goto have_port;
}
- tb = NULL;
- goto tb_not_found;
+ offset--;
+ if (!(offset & 1))
+ goto other_parity_scan;
+
+ if (attempt_half == 1) {
+ /* OK we now try the upper half of the range */
+ attempt_half = 2;
+ goto other_half_scan;
+ }
+ return ret;
+
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port);
+ if (!tb)
+ goto fail_unlock;
tb_found:
if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
- if (((tb->fastreuse > 0 &&
- sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+ if (((tb->fastreuse > 0 && reuse) ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ smallest_size == -1)
goto success;
- } else {
- ret = 1;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
- if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size != -1 && --attempts >= 0) {
- spin_unlock(&head->lock);
- goto again;
- }
-
- goto fail_unlock;
+ if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
+ if ((reuse ||
+ (tb->fastreuseport > 0 &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
+ smallest_size != -1 && --attempts >= 0) {
+ spin_unlock_bh(&head->lock);
+ goto again;
}
+ goto fail_unlock;
}
- }
-tb_not_found:
- ret = 1;
- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
- net, head, snum)) == NULL)
- goto fail_unlock;
- if (hlist_empty(&tb->owners)) {
- if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
- tb->fastreuse = 1;
- else
+ if (!reuse)
tb->fastreuse = 0;
+ if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ tb->fastreuseport = 0;
+ } else {
+ tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = 1;
tb->fastuid = uid;
- } else
- tb->fastreuseport = 0;
- } else {
- if (tb->fastreuse &&
- (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
- tb->fastreuse = 0;
- if (tb->fastreuseport &&
- (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
+ } else {
tb->fastreuseport = 0;
+ }
}
success:
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, snum);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
ret = 0;
fail_unlock:
- spin_unlock(&head->lock);
-fail:
- local_bh_enable();
+ spin_unlock_bh(&head->lock);
return ret;
}
EXPORT_SYMBOL_GPL(inet_csk_get_port);
@@@ -476,6 -482,10 +476,6 @@@ EXPORT_SYMBOL_GPL(inet_csk_route_child_
#define AF_INET_FAMILY(fam) true
#endif
-/* Only thing we need from tcp.h */
-extern int sysctl_tcp_synack_retries;
-
-
/* Decide when to expire the request and when to resend SYN-ACK */
static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
const int max_retries,
@@@ -547,7 -557,6 +547,7 @@@ static void reqsk_timer_handler(unsigne
{
struct request_sock *req = (struct request_sock *)data;
struct sock *sk_listener = req->rsk_listener;
+ struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
int qlen, expire = 0, resend = 0;
@@@ -557,7 -566,7 +557,7 @@@
if (sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
- max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+ max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
thresh = max_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
@@@ -728,7 -737,6 +728,7 @@@ int inet_csk_listen_start(struct sock *
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@@ -746,14 -754,13 +746,14 @@@
inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
- return 0;
+ if (likely(!err))
+ return 0;
}
sk->sk_state = TCP_CLOSE;
- return -EADDRINUSE;
+ return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
@@@ -782,14 -789,16 +782,16 @@@ static void inet_child_forget(struct so
reqsk_put(req);
}
- void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
- struct sock *child)
+ struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+ struct request_sock *req,
+ struct sock *child)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_child_forget(sk, req, child);
+ child = NULL;
} else {
req->sk = child;
req->dl_next = NULL;
@@@ -801,6 -810,7 +803,7 @@@
sk_acceptq_added(sk);
}
spin_unlock(&queue->rskq_lock);
+ return child;
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
@@@ -810,11 -820,8 +813,8 @@@ struct sock *inet_csk_complete_hashdanc
if (own_req) {
inet_csk_reqsk_queue_drop(sk, req);
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- inet_csk_reqsk_queue_add(sk, req, child);
- /* Warning: caller must not call reqsk_put(req);
- * child stole last reference on it.
- */
- return child;
+ if (inet_csk_reqsk_queue_add(sk, req, child))
+ return child;
}
/* Too bad, another child took ownership of the request, undo. */
bh_unlock_sock(child);
diff --combined net/ipv4/ip_gre.c
index 12071e2,41ba68d..202437d
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@@ -238,7 -238,7 +238,7 @@@ static int parse_gre_header(struct sk_b
return -EINVAL;
}
}
- return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+ return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
}
static void ipgre_err(struct sk_buff *skb, u32 info,
@@@ -440,17 -440,6 +440,17 @@@ drop
return 0;
}
+static __sum16 gre_checksum(struct sk_buff *skb)
+{
+ __wsum csum;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ csum = lco_csum(skb);
+ else
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ return csum_fold(csum);
+}
+
static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
__be16 proto, __be32 key, __be32 seq)
{
@@@ -478,7 -467,8 +478,7 @@@
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
*ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
+ *(__sum16 *)ptr = gre_checksum(skb);
}
}
}
@@@ -503,7 -493,8 +503,7 @@@ static void __gre_xmit(struct sk_buff *
static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
bool csum)
{
- return iptunnel_handle_offloads(skb, csum,
- csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+ return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
static struct rtable *gre_get_rt(struct sk_buff *skb,
@@@ -540,16 -531,9 +540,16 @@@ static void gre_fb_xmit(struct sk_buff
goto err_free_skb;
key = &tun_info->key;
- rt = gre_get_rt(skb, dev, &fl, key);
- if (IS_ERR(rt))
- goto err_free_skb;
+ rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
+ NULL;
+ if (!rt) {
+ rt = gre_get_rt(skb, dev, &fl, key);
+ if (IS_ERR(rt))
+ goto err_free_skb;
+ if (!skb->mark)
+ dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+ fl.saddr);
+ }
tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
@@@ -1070,8 -1054,9 +1070,9 @@@ static const struct net_device_ops gre_
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->netdev_ops = &gre_tap_netdev_ops;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
@@@ -1256,6 -1241,14 +1257,14 @@@ struct net_device *gretap_fb_dev_create
err = ipgre_newlink(net, dev, tb, NULL);
if (err < 0)
goto out;
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
return dev;
out:
free_netdev(dev);
diff --combined net/ipv4/ip_sockglue.c
index 3f1befc,a501242..035ad64
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@@ -249,6 -249,8 +249,8 @@@ int ip_cmsg_send(struct net *net, struc
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+ /* Our caller is responsible for freeing ipc->opt */
err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
err < 40 ? err : 40);
if (err)
@@@ -571,7 -573,6 +573,7 @@@ static int do_ip_setsockopt(struct soc
int optname, char __user *optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
int val = 0, err;
bool needs_rtnl = setsockopt_needs_rtnl(optname);
@@@ -911,7 -912,7 +913,7 @@@
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
kfree(msf);
err = -ENOBUFS;
break;
@@@ -1066,7 -1067,7 +1068,7 @@@
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sysctl_igmp_max_msf) {
+ gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
err = -ENOBUFS;
goto mc_msf_out;
}
@@@ -1341,13 -1342,10 +1343,13 @@@ static int do_ip_getsockopt(struct soc
val = inet->tos;
break;
case IP_TTL:
+ {
+ struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- sysctl_ip_default_ttl :
+ net->ipv4.sysctl_ip_default_ttl :
inet->uc_ttl);
break;
+ }
case IP_HDRINCL:
val = inet->hdrincl;
break;
diff --combined net/ipv4/ip_tunnel.c
index 4569da7,89e8861..dff8a05
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@@ -68,6 -68,61 +68,6 @@@ static unsigned int ip_tunnel_hash(__be
IP_TNL_HASH_BITS);
}
-static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
- struct dst_entry *dst, __be32 saddr)
-{
- struct dst_entry *old_dst;
-
- dst_clone(dst);
- old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
- dst_release(old_dst);
- idst->saddr = saddr;
-}
-
-static noinline void tunnel_dst_set(struct ip_tunnel *t,
- struct dst_entry *dst, __be32 saddr)
-{
- __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
-}
-
-static void tunnel_dst_reset(struct ip_tunnel *t)
-{
- tunnel_dst_set(t, NULL, 0);
-}
-
-void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
-{
- int i;
-
- for_each_possible_cpu(i)
- __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
-}
-EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-
-static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
- u32 cookie, __be32 *saddr)
-{
- struct ip_tunnel_dst *idst;
- struct dst_entry *dst;
-
- rcu_read_lock();
- idst = raw_cpu_ptr(t->dst_cache);
- dst = rcu_dereference(idst->dst);
- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
- dst = NULL;
- if (dst) {
- if (!dst->obsolete || dst->ops->check(dst, cookie)) {
- *saddr = idst->saddr;
- } else {
- tunnel_dst_reset(t);
- dst_release(dst);
- dst = NULL;
- }
- }
- rcu_read_unlock();
- return (struct rtable *)dst;
-}
-
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
{
@@@ -326,8 -381,7 +326,8 @@@ static int ip_tunnel_bind_dev(struct ne
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
@@@ -675,8 -729,7 +675,8 @@@ void ip_tunnel_xmit(struct sk_buff *skb
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
- rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
+ rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
+ NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@@ -686,8 -739,7 +686,8 @@@
goto tx_error;
}
if (connected)
- tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ fl4.saddr);
}
if (rt->dst.dev == dev) {
@@@ -784,7 -836,7 +784,7 @@@ static void ip_tunnel_update(struct ip_
if (set_mtu)
dev->mtu = mtu;
}
- ip_tunnel_dst_reset_all(t);
+ dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
@@@ -891,17 -943,31 +891,31 @@@ done
}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
- int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+ if (new_mtu < 68)
return -EINVAL;
+
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
dev->mtu = new_mtu;
return 0;
}
+ EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+ int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+ {
+ return __ip_tunnel_change_mtu(dev, new_mtu, true);
+ }
EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
static void ip_tunnel_dev_free(struct net_device *dev)
@@@ -909,7 -975,7 +923,7 @@@
struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells);
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@@ -1103,15 -1169,15 +1117,15 @@@ int ip_tunnel_init(struct net_device *d
if (!dev->tstats)
return -ENOMEM;
- tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- if (!tunnel->dst_cache) {
+ err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (err) {
free_percpu(dev->tstats);
- return -ENOMEM;
+ return err;
}
err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) {
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
return err;
}
@@@ -1141,7 -1207,7 +1155,7 @@@ void ip_tunnel_uninit(struct net_devic
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(itn, netdev_priv(dev));
- ip_tunnel_dst_reset_all(tunnel);
+ dst_cache_reset(&tunnel->dst_cache);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --combined net/ipv4/ping.c
index f6f93fc,d3a2716..76dce90
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@@ -145,12 -145,10 +145,12 @@@ fail
}
EXPORT_SYMBOL_GPL(ping_get_port);
-void ping_hash(struct sock *sk)
+int ping_hash(struct sock *sk)
{
pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
+
+ return 0;
}
void ping_unhash(struct sock *sk)
@@@ -748,8 -746,10 +748,10 @@@ static int ping_v4_sendmsg(struct sock
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
}
diff --combined net/ipv4/raw.c
index d635251,7113bae..8d22de7
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@@ -93,7 -93,7 +93,7 @@@ static struct raw_hashinfo raw_v4_hashi
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
-void raw_hash_sk(struct sock *sk)
+int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
@@@ -104,8 -104,6 +104,8 @@@
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(raw_hash_sk);
@@@ -549,8 -547,10 +549,10 @@@ static int raw_sendmsg(struct sock *sk
if (msg->msg_controllen) {
err = ip_cmsg_send(net, msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
goto out;
+ }
if (ipc.opt)
free = 1;
}
diff --combined net/ipv4/tcp.c
index f93150d,483ffdf..f9faadb
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -282,6 -282,8 +282,6 @@@
#include <asm/unaligned.h>
#include <net/busy_poll.h>
-int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
-
int sysctl_tcp_min_tso_segs __read_mostly = 2;
int sysctl_tcp_autocorking __read_mostly = 1;
@@@ -404,7 -406,7 +404,7 @@@ void tcp_init_sock(struct sock *sk
tp->mss_cache = TCP_MSS_DEFAULT;
u64_stats_init(&tp->syncp);
- tp->reordering = sysctl_tcp_reordering;
+ tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
tcp_assign_congestion_control(sk);
@@@ -938,7 -940,7 +938,7 @@@ new_segment
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@@ -1211,7 -1213,7 +1211,7 @@@ new_segment
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == MAX_SKB_FRAGS || !sg) {
+ if (i == sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@@ -1464,10 -1466,8 +1464,10 @@@ static struct sk_buff *tcp_recv_skb(str
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ pr_err_once("%s: found a SYN, please report !\n", __func__);
offset--;
+ }
if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset;
return skb;
@@@ -1657,10 -1657,8 +1657,10 @@@ int tcp_recvmsg(struct sock *sk, struc
break;
offset = *seq - TCP_SKB_CB(skb)->seq;
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ pr_err_once("%s: found a SYN, please report !\n", __func__);
offset--;
+ }
if (offset < skb->len)
goto found_ok_skb;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@@ -2328,7 -2326,6 +2328,7 @@@ static int do_tcp_setsockopt(struct soc
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
int val;
int err = 0;
@@@ -2525,7 -2522,7 +2525,7 @@@
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
- else if (val > sysctl_tcp_fin_timeout / HZ)
+ else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
tp->linger2 = 0;
else
tp->linger2 = val * HZ;
@@@ -2642,7 -2639,6 +2642,7 @@@ void tcp_get_info(struct sock *sk, stru
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
+ int notsent_bytes;
u64 rate64;
u32 rate;
@@@ -2723,11 -2719,6 +2723,11 @@@
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
info->tcpi_segs_out = tp->segs_out;
info->tcpi_segs_in = tp->segs_in;
+
+ notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
+ info->tcpi_notsent_bytes = max(0, notsent_bytes);
+
+ info->tcpi_min_rtt = tcp_min_rtt(tp);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@@ -2736,7 -2727,6 +2736,7 @@@ static int do_tcp_getsockopt(struct soc
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
int val, len;
if (get_user(len, optlen))
@@@ -2771,12 -2761,12 +2771,12 @@@
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@@ -2960,7 -2950,7 +2960,7 @@@ static void __tcp_alloc_md5sig_pool(voi
struct crypto_hash *hash;
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
+ if (IS_ERR(hash))
return;
per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
}
diff --combined net/ipv4/tcp_input.c
index 5ee6fe0,3b2c8e9..e6e65f7
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -80,7 -80,9 +80,7 @@@ int sysctl_tcp_timestamps __read_mostl
int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly = 1;
-int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
int sysctl_tcp_max_reordering __read_mostly = 300;
-EXPORT_SYMBOL(sysctl_tcp_reordering);
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@@ -124,10 -126,6 +124,10 @@@ int sysctl_tcp_invalid_ratelimit __read
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
+#define REXMIT_NONE 0 /* no loss recovery to do */
+#define REXMIT_LOST 1 /* retransmit packets marked lost */
+#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
+
/* Adapt the MSS value used to make delayed ack decision to the
* real world.
*/
@@@ -1212,7 -1210,6 +1212,7 @@@ static u8 tcp_sacktag_one(struct sock *
sacked |= TCPCB_SACKED_ACKED;
state->flag |= FLAG_DATA_SACKED;
tp->sacked_out += pcount;
+ tp->delivered += pcount; /* Out-of-order packets delivered */
fack_count += pcount;
@@@ -1824,12 -1821,8 +1824,12 @@@ static void tcp_check_reno_reordering(s
static void tcp_add_reno_sack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+
tp->sacked_out++;
tcp_check_reno_reordering(sk, 0);
+ if (tp->sacked_out > prior_sacked)
+ tp->delivered++; /* Some out-of-order packet is delivered */
tcp_verify_left_out(tp);
}
@@@ -1841,7 -1834,6 +1841,7 @@@ static void tcp_remove_reno_sacks(struc
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
+ tp->delivered += max_t(int, acked - tp->sacked_out, 1);
if (acked - 1 >= tp->sacked_out)
tp->sacked_out = 0;
else
@@@ -1881,7 -1873,6 +1881,7 @@@ void tcp_enter_loss(struct sock *sk
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct sk_buff *skb;
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
@@@ -1932,9 -1923,9 +1932,9 @@@
* suggests that the degree of reordering is over-estimated.
*/
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= sysctl_tcp_reordering)
+ tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- sysctl_tcp_reordering);
+ net->ipv4.sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@@ -2118,7 -2109,6 +2118,7 @@@ static bool tcp_time_to_recover(struct
{
struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out;
+ int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
/* Trick#1: The loss is proven. */
if (tp->lost_out)
@@@ -2133,7 -2123,7 +2133,7 @@@
*/
packets_out = tp->packets_out;
if (packets_out <= tp->reordering &&
- tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
+ tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
!tcp_may_send_now(sk)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
@@@ -2477,12 -2467,14 +2477,12 @@@ static void tcp_init_cwnd_reduction(str
tcp_ecn_queue_cwr(tp);
}
-static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
- int fast_rexmit, int flag)
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
int sndcnt = 0;
int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
- int newly_acked_sacked = prior_unsacked -
- (tp->packets_out - tp->sacked_out);
if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
return;
@@@ -2500,8 -2492,7 +2500,8 @@@
} else {
sndcnt = min(delta, newly_acked_sacked);
}
- sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+ /* Force a fast retransmit upon entering fast recovery */
+ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
}
@@@ -2546,7 -2537,7 +2546,7 @@@ static void tcp_try_keep_open(struct so
}
}
-static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
+static void tcp_try_to_open(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@@ -2560,6 -2551,8 +2560,6 @@@
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- } else {
- tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
}
}
@@@ -2669,8 -2662,7 +2669,8 @@@ static void tcp_enter_recovery(struct s
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
+static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+ int *rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
@@@ -2692,15 -2684,10 +2692,15 @@@
tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
- __tcp_push_pending_frames(sk, tcp_current_mss(sk),
- TCP_NAGLE_OFF);
- if (after(tp->snd_nxt, tp->high_seq))
- return; /* Step 2.b */
+ /* Step 2.b. Try send new data (but deferred until cwnd
+ * is updated in tcp_ack()). Otherwise fall back to
+ * the conventional recovery.
+ */
+ if (tcp_send_head(sk) &&
+ after(tcp_wnd_end(tp), tp->snd_nxt)) {
+ *rexmit = REXMIT_NEW;
+ return;
+ }
tp->frto = 0;
}
}
@@@ -2719,11 -2706,12 +2719,11 @@@
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
- tcp_xmit_retransmit_queue(sk);
+ *rexmit = REXMIT_LOST;
}
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked,
- const int prior_unsacked, int flag)
+static bool tcp_try_undo_partial(struct sock *sk, const int acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@@ -2738,8 -2726,10 +2738,8 @@@
* can undo. Otherwise we clock out new packets but do not
* mark more packets lost or retransmit more.
*/
- if (tp->retrans_out) {
- tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
+ if (tp->retrans_out)
return true;
- }
if (!tcp_any_retrans_done(sk))
tp->retrans_stamp = 0;
@@@ -2758,21 -2748,21 +2758,21 @@@
* taking into account both packets sitting in receiver's buffer and
* packets lost by network.
*
- * Besides that it does CWND reduction, when packet loss is detected
- * and changes state of machine.
+ * Besides that it updates the congestion state when packet loss or ECN
+ * is detected. But it does not reduce the cwnd, it is done by the
+ * congestion control later.
*
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
- const int prior_unsacked,
- bool is_dupack, int flag)
+ bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ int fast_rexmit = 0, flag = *ack_flag;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
tp->sacked_out = 0;
@@@ -2819,10 -2809,8 +2819,10 @@@
/* Use RACK to detect loss */
if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
- tcp_rack_mark_lost(sk))
+ tcp_rack_mark_lost(sk)) {
flag |= FLAG_LOST_RETRANS;
+ *ack_flag |= FLAG_LOST_RETRANS;
+ }
/* E. Process state. */
switch (icsk->icsk_ca_state) {
@@@ -2831,7 -2819,7 +2831,7 @@@
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
+ if (tcp_try_undo_partial(sk, acked))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
@@@ -2843,7 -2831,7 +2843,7 @@@
}
break;
case TCP_CA_Loss:
- tcp_process_loss(sk, flag, is_dupack);
+ tcp_process_loss(sk, flag, is_dupack, rexmit);
if (icsk->icsk_ca_state != TCP_CA_Open &&
!(flag & FLAG_LOST_RETRANS))
return;
@@@ -2860,7 -2848,7 +2860,7 @@@
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk, flag)) {
- tcp_try_to_open(sk, flag, prior_unsacked);
+ tcp_try_to_open(sk, flag);
return;
}
@@@ -2882,7 -2870,8 +2882,7 @@@
if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
- tcp_xmit_retransmit_queue(sk);
+ *rexmit = REXMIT_LOST;
}
/* Kathleen Nichols' algorithm for tracking the minimum value of
@@@ -2907,7 -2896,10 +2907,10 @@@ static void tcp_update_rtt_min(struct s
{
const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+ struct rtt_meas rttm = {
+ .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+ .ts = now,
+ };
u32 elapsed;
/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
@@@ -3104,7 -3096,7 +3107,7 @@@ static void tcp_ack_tstamp(struct sock
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una,
+ u32 prior_snd_una, int *acked,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@@ -3162,13 -3154,10 +3165,13 @@@
flag |= FLAG_ORIG_SACK_ACKED;
}
- if (sacked & TCPCB_SACKED_ACKED)
+ if (sacked & TCPCB_SACKED_ACKED) {
tp->sacked_out -= acked_pcount;
- else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
- tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+ } else if (tcp_is_sack(tp)) {
+ tp->delivered += acked_pcount;
+ if (!tcp_skb_spurious_retrans(tp, skb))
+ tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+ }
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@@ -3277,7 -3266,6 +3280,7 @@@
}
}
#endif
+ *acked = pkts_acked;
return flag;
}
@@@ -3311,36 -3299,21 +3314,36 @@@ static inline bool tcp_ack_is_dubious(c
/* Decide wheather to run the increase function of congestion control. */
static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
- if (tcp_in_cwnd_reduction(sk))
- return false;
-
/* If reordering is high then always grow cwnd whenever data is
* delivered regardless of its ordering. Otherwise stay conservative
* and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
}
+/* The "ultimate" congestion control function that aims to replace the rigid
+ * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
+ * It's called toward the end of processing an ACK with precise rate
+ * information. All transmission or retransmission are delayed afterwards.
+ */
+static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
+ int flag)
+{
+ if (tcp_in_cwnd_reduction(sk)) {
+ /* Reduce cwnd if state mandates */
+ tcp_cwnd_reduction(sk, acked_sacked, flag);
+ } else if (tcp_may_raise_cwnd(sk, flag)) {
+ /* Advance cwnd if state allows */
+ tcp_cong_avoid(sk, ack, acked_sacked);
+ }
+ tcp_update_pacing_rate(sk);
+}
+
/* Check that window update is acceptable.
* The function assumes that snd_una<=ack<=snd_next.
*/
@@@ -3536,27 -3509,6 +3539,27 @@@ static inline void tcp_in_ack_event(str
icsk->icsk_ca_ops->in_ack_event(sk, flags);
}
+/* Congestion control has updated the cwnd already. So if we're in
+ * loss recovery then now we do any new sends (for FRTO) or
+ * retransmits (for CA_Loss or CA_recovery) that make sense.
+ */
+static void tcp_xmit_recovery(struct sock *sk, int rexmit)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (rexmit == REXMIT_NONE)
+ return;
+
+ if (unlikely(rexmit == 2)) {
+ __tcp_push_pending_frames(sk, tcp_current_mss(sk),
+ TCP_NAGLE_OFF);
+ if (after(tp->snd_nxt, tp->high_seq))
+ return;
+ tp->frto = 0;
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@@@ -3569,9 -3521,8 +3572,9 @@@
bool is_dupack = false;
u32 prior_fackets;
int prior_packets = tp->packets_out;
- const int prior_unsacked = tp->packets_out - tp->sacked_out;
+ u32 prior_delivered = tp->delivered;
int acked = 0; /* Number of packets newly acked */
+ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
sack_state.first_sackt.v64 = 0;
@@@ -3660,16 -3611,23 +3663,16 @@@
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- acked = tp->packets_out;
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
- acked -= tp->packets_out;
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, prior_unsacked,
- is_dupack, flag);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
- /* Advance cwnd if state allows */
- if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked);
-
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
struct dst_entry *dst = __sk_dst_get(sk);
if (dst)
@@@ -3678,14 -3636,14 +3681,14 @@@
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
- tcp_update_pacing_rate(sk);
+ tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ tcp_xmit_recovery(sk, rexmit);
return 1;
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, prior_unsacked,
- is_dupack, flag);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@@ -3708,8 -3666,8 +3711,8 @@@ old_ack
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, prior_unsacked,
- is_dupack, flag);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_xmit_recovery(sk, rexmit);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@@ -4040,7 -3998,7 +4043,7 @@@ void tcp_reset(struct sock *sk
*
* If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
*/
-static void tcp_fin(struct sock *sk)
+void tcp_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@@ -5554,9 -5512,6 +5557,9 @@@ static bool tcp_rcv_fastopen_synack(str
tp->syn_data_acked = tp->syn_data;
if (tp->syn_data_acked)
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+
+ tcp_fastopen_add_skb(sk, synack);
+
return false;
}
@@@ -6163,10 -6118,9 +6166,10 @@@ static bool tcp_syn_flood_action(const
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
bool want_cookie = false;
+ struct net *net = sock_net(sk);
#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies) {
+ if (net->ipv4.sysctl_tcp_syncookies) {
msg = "Sending cookies";
want_cookie = true;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@@ -6175,7 -6129,7 +6178,7 @@@
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
if (!queue->synflood_warned &&
- sysctl_tcp_syncookies != 2 &&
+ net->ipv4.sysctl_tcp_syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
@@@ -6208,7 -6162,6 +6211,7 @@@ int tcp_conn_request(struct request_soc
__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
struct dst_entry *dst = NULL;
struct request_sock *req;
@@@ -6219,7 -6172,7 +6222,7 @@@
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((sysctl_tcp_syncookies == 2 ||
+ if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
if (!want_cookie)
@@@ -6285,7 -6238,7 +6288,7 @@@
}
}
/* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
+ else if (!net->ipv4.sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst, false,
diff --combined net/ipv4/tcp_ipv4.c
index 3f872a6,487ac67..4c8d58d
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@@ -311,7 -311,7 +311,7 @@@ static void do_redirect(struct sk_buff
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
- void tcp_req_err(struct sock *sk, u32 seq)
+ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
@@@ -323,7 -323,7 +323,7 @@@
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
+ } else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
@@@ -383,7 -383,12 +383,12 @@@ void tcp_v4_err(struct sk_buff *icmp_sk
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq,
+ type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@@ -637,8 -642,8 +642,8 @@@ static void tcp_v4_send_reset(const str
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net,
- &tcp_hashinfo, ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb));
/* don't send rst if it can't find key */
@@@ -860,6 -865,7 +865,6 @@@ static void tcp_v4_reqsk_destructor(str
kfree(inet_rsk(req)->opt);
}
-
#ifdef CONFIG_TCP_MD5SIG
/*
* RFC2385 MD5 checksumming requires a mapping of
@@@ -1581,8 -1587,7 +1586,8 @@@ int tcp_v4_rcv(struct sk_buff *skb
TCP_SKB_CB(skb)->sacked = 0;
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ th->dest);
if (!sk)
goto no_tcp_socket;
@@@ -1592,28 -1597,30 +1597,30 @@@ process
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@@ -1696,8 -1703,7 +1703,8 @@@ do_time_wait
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- &tcp_hashinfo,
+ &tcp_hashinfo, skb,
+ __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb));
@@@ -2389,16 -2395,6 +2396,16 @@@ static int __net_init tcp_sk_init(struc
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+ net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+ net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
+ net->ipv4.sysctl_tcp_syncookies = 1;
+ net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+ net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
+ net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
+ net->ipv4.sysctl_tcp_orphan_retries = 0;
+ net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+ net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+
return 0;
fail:
tcp_sk_exit(net);
diff --combined net/ipv4/udp.c
index 9fc4e9c,95d2f19..836abe5
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -356,8 -356,8 +356,8 @@@ EXPORT_SYMBOL(udp_lib_get_port)
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ bool match_wildcard)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@@ -848,20 -848,32 +848,20 @@@ void udp_set_csum(bool nocheck, struct
{
struct udphdr *uh = udp_hdr(skb);
- if (nocheck)
+ if (nocheck) {
uh->check = 0;
- else if (skb_is_gso(skb))
+ } else if (skb_is_gso(skb)) {
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features &
- (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ uh->check = 0;
+ uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v4_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp_set_csum);
@@@ -1036,8 -1048,10 +1036,10 @@@ int udp_sendmsg(struct sock *sk, struc
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc,
sk->sk_family == AF_INET6);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
connected = 0;
diff --combined net/ipv6/addrconf.c
index ac0ba9e,bdd7eac..4751f89
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -583,7 -583,7 +583,7 @@@ static int inet6_netconf_get_devconf(st
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@@ -3538,6 -3538,7 +3538,7 @@@ static void addrconf_dad_begin(struct i
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
+ bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@@ -3583,7 -3584,7 +3584,7 @@@
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ notify = true;
}
}
@@@ -3591,6 -3592,8 +3592,8 @@@
out:
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
+ if (notify)
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@@ -4711,8 -4714,6 +4714,8 @@@ static inline void ipv6_store_devconf(s
array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
/* we omit DEVCONF_STABLE_SECRET for now */
array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
+ array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
}
static inline size_t inet6_ifla6_size(void)
@@@ -5787,20 -5788,6 +5790,20 @@@ static struct addrconf_sysctl_tabl
.proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
},
{
+ .procname = "drop_unicast_in_l2_multicast",
+ .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "drop_unsolicited_na",
+ .data = &ipv6_devconf.drop_unsolicited_na,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
/* sentinel */
}
},
diff --combined net/ipv6/ip6_gre.c
index a94e506,a69aad1..f7c9560
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@@ -360,7 -360,7 +360,7 @@@ static void ip6gre_tunnel_uninit(struc
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
@@@ -633,7 -633,7 +633,7 @@@ static netdev_tx_t ip6gre_xmit2(struct
}
if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_get(tunnel);
+ dst = dst_cache_get(&tunnel->dst_cache);
if (!dst) {
dst = ip6_route_output(net, NULL, fl6);
@@@ -702,7 -702,7 +702,7 @@@
}
if (!fl6->flowi6_mark && ndst)
- ip6_tnl_dst_set(tunnel, ndst);
+ dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
@@@ -1009,7 -1009,7 +1009,7 @@@ static int ip6gre_tnl_change(struct ip6
t->parms.o_key = p->o_key;
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
ip6gre_tnl_link_config(t, set_mtu);
return 0;
}
@@@ -1219,7 -1219,7 +1219,7 @@@ static void ip6gre_dev_free(struct net_
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6_tnl_dst_destroy(t);
+ dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@@ -1257,7 -1257,7 +1257,7 @@@ static int ip6gre_tunnel_init_common(st
if (!dev->tstats)
return -ENOMEM;
- ret = ip6_tnl_dst_init(tunnel);
+ ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret) {
free_percpu(dev->tstats);
dev->tstats = NULL;
@@@ -1512,6 -1512,7 +1512,7 @@@ static void ip6gre_tap_setup(struct net
dev->destructor = ip6gre_dev_free;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}
static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
diff --combined net/ipv6/tcp_ipv6.c
index 9977b6f,5c8c842..33f2820
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@@ -327,6 -327,7 +327,7 @@@ static void tcp_v6_err(struct sk_buff *
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
+ bool fatal;
int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@@ -345,8 -346,9 +346,9 @@@
return;
}
seq = ntohl(th->seq);
+ fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq, fatal);
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@@ -400,7 -402,6 +402,6 @@@
goto out;
}
- icmpv6_err_convert(type, code, &err);
/* Might be for an request_sock */
switch (sk->sk_state) {
@@@ -866,8 -867,7 +867,8 @@@ static void tcp_v6_send_reset(const str
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, &ipv6h->saddr,
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
@@@ -1376,8 -1376,8 +1377,8 @@@ static int tcp_v6_rcv(struct sk_buff *s
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- inet6_iif(skb));
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ th->source, th->dest, inet6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@@ -1387,7 -1387,7 +1388,7 @@@ process
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
@@@ -1395,24 -1395,24 +1396,24 @@@
reqsk_put(req);
goto discard_it;
}
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
tcp_v6_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@@ -1501,7 -1501,6 +1502,7 @@@ do_time_wait
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), tcp_v6_iif(skb));
@@@ -1867,7 -1866,7 +1868,7 @@@ struct proto tcpv6_prot =
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
diff --combined net/netfilter/nfnetlink.c
index 9a99f68,857ae89..2278d9a
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@@ -127,6 -127,13 +127,6 @@@ int nfnetlink_has_listeners(struct net
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask)
-{
- return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
-
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags)
{
@@@ -304,14 -311,14 +304,14 @@@ replay
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
@@@ -321,10 -328,12 +321,12 @@@
nlh = nlmsg_hdr(skb);
err = 0;
- if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
- skb->len < nlh->nlmsg_len) {
- err = -EINVAL;
- goto ack;
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < nlh->nlmsg_len ||
+ nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+ nfnl_err_reset(&err_list);
+ status |= NFNL_BATCH_FAILURE;
+ goto done;
}
/* Only requests are handled by the kernel */
@@@ -399,7 -408,7 +401,7 @@@ ack
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
status |= NFNL_BATCH_FAILURE;
goto done;
}
diff --combined net/sched/sch_api.c
index c9673b5e,af1acf0..de1e176
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@@ -1841,7 -1841,7 +1841,7 @@@ reclassify
return err;
}
- return -1;
+ return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
if (unlikely(limit++ >= MAX_REC_LOOP)) {
@@@ -1852,6 -1852,7 +1852,7 @@@
}
tp = old_tp;
+ protocol = tc_skb_protocol(skb);
goto reclassify;
#endif
}
diff --combined net/sctp/socket.c
index 6427b9d,e878da0..b89501e
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -5538,6 -5538,7 +5538,7 @@@ static int sctp_getsockopt_hmac_ident(s
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
+ int i;
if (!ep->auth_enable)
return -EACCES;
@@@ -5555,8 -5556,12 +5556,12 @@@
return -EFAULT;
if (put_user(num_idents, &p->shmac_num_idents))
return -EFAULT;
- if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
- return -EFAULT;
+ for (i = 0; i < num_idents; i++) {
+ __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+ if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+ return -EFAULT;
+ }
return 0;
}
@@@ -6101,10 -6106,9 +6106,10 @@@ static int sctp_getsockopt(struct sock
return retval;
}
-static void sctp_hash(struct sock *sk)
+static int sctp_hash(struct sock *sk)
{
/* STUB */
+ return 0;
}
static void sctp_unhash(struct sock *sk)
diff --combined net/tipc/link.c
index 3e513da,347cdc9..e31d92f
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@@ -123,6 -123,7 +123,6 @@@ struct tipc_stats
struct tipc_link {
u32 addr;
char name[TIPC_MAX_LINK_NAME];
- struct tipc_media_addr *media_addr;
struct net *net;
/* Management and link supervision data */
@@@ -903,10 -904,8 +903,10 @@@ int tipc_link_xmit(struct tipc_link *l
if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
return link_schedule_user(l, list);
}
- if (unlikely(msg_size(hdr) > mtu))
+ if (unlikely(msg_size(hdr) > mtu)) {
+ skb_queue_purge(list);
return -EMSGSIZE;
+ }
/* Prepare each packet for sending, and add to relevant queue: */
while (skb_queue_len(list)) {
@@@ -918,10 -917,8 +918,10 @@@
if (likely(skb_queue_len(transmq) < maxwin)) {
_skb = skb_clone(skb, GFP_ATOMIC);
- if (!_skb)
+ if (!_skb) {
+ skb_queue_purge(list);
return -ENOBUFS;
+ }
__skb_dequeue(list);
__skb_queue_tail(transmq, skb);
__skb_queue_tail(xmitq, _skb);
@@@ -1264,6 -1261,26 +1264,6 @@@ drop
return rc;
}
-/*
- * Send protocol message to the other endpoint.
- */
-static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ,
- int probe_msg, u32 gap, u32 tolerance,
- u32 priority)
-{
- struct sk_buff *skb = NULL;
- struct sk_buff_head xmitq;
-
- __skb_queue_head_init(&xmitq);
- tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
- tolerance, priority, &xmitq);
- skb = __skb_dequeue(&xmitq);
- if (!skb)
- return;
- tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
- l->rcv_unacked = 0;
-}
-
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq)
@@@ -1462,12 -1479,6 +1462,12 @@@ static int tipc_link_proto_rcv(struct t
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
l->tolerance = peers_tol;
+ if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
+ TIPC_MAX_LINK_PRI)) {
+ l->priority = peers_prio;
+ rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+
l->silent_intv_cnt = 0;
l->stats.recv_states++;
if (msg_probe(hdr))
@@@ -1962,8 -1973,10 +1962,10 @@@ int tipc_nl_add_bc_link(struct net *net
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
NLM_F_MULTI, TIPC_NL_LINK_GET);
- if (!hdr)
+ if (!hdr) {
+ tipc_bcast_unlock(net);
return -EMSGSIZE;
+ }
attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
if (!attrs)
@@@ -2010,18 -2023,16 +2012,18 @@@ msg_full
return -EMSGSIZE;
}
-void tipc_link_set_tolerance(struct tipc_link *l, u32 tol)
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
+ struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
}
-void tipc_link_set_prio(struct tipc_link *l, u32 prio)
+void tipc_link_set_prio(struct tipc_link *l, u32 prio,
+ struct sk_buff_head *xmitq)
{
l->priority = prio;
- tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
}
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --combined net/tipc/node.c
index 10a1e87,9d7a16f..9fcc2fb
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@@ -346,12 -346,6 +346,6 @@@ struct tipc_node *tipc_node_create(stru
skb_queue_head_init(&n->bc_entry.inputq2);
for (i = 0; i < MAX_BEARERS; i++)
spin_lock_init(&n->links[i].lock);
- hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
- list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
- if (n->addr < temp_node->addr)
- break;
- }
- list_add_tail_rcu(&n->list, &temp_node->list);
n->state = SELF_DOWN_PEER_LEAVING;
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
@@@ -372,6 -366,12 +366,12 @@@
tipc_node_get(n);
setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
n->keepalive_intv = U32_MAX;
+ hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n->list, &temp_node->list);
exit:
spin_unlock_bh(&tn->node_list_lock);
return n;
@@@ -1166,7 -1166,7 +1166,7 @@@ msg_full
* @dnode: address of destination node
* @selector: a number used for deterministic link selection
* Consumes the buffer chain, except when returning -ELINKCONG
- * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+ * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
*/
int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
u32 dnode, int selector)
@@@ -1174,43 -1174,33 +1174,43 @@@
struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
- int bearer_id = -1;
- int rc = -EHOSTUNREACH;
+ int bearer_id;
+ int rc;
+
+ if (in_own_node(net, dnode)) {
+ tipc_sk_rcv(net, list);
+ return 0;
+ }
- __skb_queue_head_init(&xmitq);
n = tipc_node_find(net, dnode);
- if (likely(n)) {
- tipc_node_read_lock(n);
- bearer_id = n->active_links[selector & 1];
- if (bearer_id >= 0) {
- le = &n->links[bearer_id];
- spin_lock_bh(&le->lock);
- rc = tipc_link_xmit(le->link, list, &xmitq);
- spin_unlock_bh(&le->lock);
- }
+ if (unlikely(!n)) {
+ skb_queue_purge(list);
+ return -EHOSTUNREACH;
+ }
+
+ tipc_node_read_lock(n);
+ bearer_id = n->active_links[selector & 1];
+ if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
- if (likely(!rc))
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
- else if (rc == -ENOBUFS)
- tipc_node_link_down(n, bearer_id, false);
tipc_node_put(n);
- return rc;
+ skb_queue_purge(list);
+ return -EHOSTUNREACH;
}
- if (likely(in_own_node(net, dnode))) {
- tipc_sk_rcv(net, list);
- return 0;
- }
+ __skb_queue_head_init(&xmitq);
+ le = &n->links[bearer_id];
+ spin_lock_bh(&le->lock);
+ rc = tipc_link_xmit(le->link, list, &xmitq);
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(n);
+
+ if (likely(rc == 0))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ else if (rc == -ENOBUFS)
+ tipc_node_link_down(n, bearer_id, false);
+
+ tipc_node_put(n);
+
return rc;
}
@@@ -1647,12 -1637,9 +1647,12 @@@ int tipc_nl_node_set_link(struct sk_buf
char *name;
struct tipc_link *link;
struct tipc_node *node;
+ struct sk_buff_head xmitq;
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct net *net = sock_net(skb->sk);
+ __skb_queue_head_init(&xmitq);
+
if (!info->attrs[TIPC_NLA_LINK])
return -EINVAL;
@@@ -1696,13 -1683,13 +1696,13 @@@
u32 tol;
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- tipc_link_set_tolerance(link, tol);
+ tipc_link_set_tolerance(link, tol, &xmitq);
}
if (props[TIPC_NLA_PROP_PRIO]) {
u32 prio;
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
- tipc_link_set_prio(link, prio);
+ tipc_link_set_prio(link, prio, &xmitq);
}
if (props[TIPC_NLA_PROP_WIN]) {
u32 win;
@@@ -1714,7 -1701,7 +1714,7 @@@
out:
tipc_node_read_unlock(node);
-
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
return res;
}
diff --combined net/unix/af_unix.c
index b374555,f75f847..8269da7
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -1496,7 -1496,7 +1496,7 @@@ static void unix_detach_fds(struct scm_
UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->fp[i]);
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
}
static void unix_destruct_scm(struct sk_buff *skb)
@@@ -1534,6 -1534,7 +1534,6 @@@ static int unix_attach_fds(struct scm_c
{
int i;
unsigned char max_level = 0;
- int unix_sock_count = 0;
if (too_many_unix_fds(current))
return -ETOOMANYREFS;
@@@ -1541,9 -1542,11 +1541,9 @@@
for (i = scm->fp->count - 1; i >= 0; i--) {
struct sock *sk = unix_get_socket(scm->fp->fp[i]);
- if (sk) {
- unix_sock_count++;
+ if (sk)
max_level = max(max_level,
unix_sk(sk)->recursion_level);
- }
}
if (unlikely(max_level > MAX_RECURSION_LEVEL))
return -ETOOMANYREFS;
@@@ -1558,7 -1561,7 +1558,7 @@@
return -ENOMEM;
for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
return max_level;
}
@@@ -1778,7 -1781,12 +1778,12 @@@ restart_locked
goto out_unlock;
}
- if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ /* other == sk && unix_peer(other) != sk if
+ * - unix_peer(sk) == NULL, destination address bound to sk
+ * - unix_peer(sk) == sk by time of get but disconnected before lock
+ */
+ if (other != sk &&
+ unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
@@@ -2274,13 -2282,15 +2279,15 @@@ static int unix_stream_read_generic(str
size_t size = state->size;
unsigned int last_len;
- err = -EINVAL;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+ err = -EINVAL;
goto out;
+ }
- err = -EOPNOTSUPP;
- if (flags & MSG_OOB)
+ if (unlikely(flags & MSG_OOB)) {
+ err = -EOPNOTSUPP;
goto out;
+ }
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
@@@ -2302,6 -2312,7 +2309,7 @@@
bool drop_skb;
struct sk_buff *skb, *last;
+ redo:
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD)) {
err = -ECONNRESET;
@@@ -2326,9 -2337,11 +2334,11 @@@ again
goto unlock;
unix_state_unlock(sk);
- err = -EAGAIN;
- if (!timeo)
+ if (!timeo) {
+ err = -EAGAIN;
break;
+ }
+
mutex_unlock(&u->readlock);
timeo = unix_stream_data_wait(sk, timeo, last,
@@@ -2341,7 -2354,7 +2351,7 @@@
}
mutex_lock(&u->readlock);
- continue;
+ goto redo;
unlock:
unix_state_unlock(sk);
break;
--
LinuxNextTracking