Hi,
over two years ago, I've added some tickets [1,2,3,4,5,6,7] about missing/not properly locked list checks for functions which create new list entries. Only Linus created a patch [8] and the other tickets were ignored.
I have therefore now created the minimal version of the patches.
Kind regards, Sven
[1] https://www.open-mesh.org/issues/235 [2] https://www.open-mesh.org/issues/236 [3] https://www.open-mesh.org/issues/237 [4] https://www.open-mesh.org/issues/238 [5] https://www.open-mesh.org/issues/239 [6] https://www.open-mesh.org/issues/240 [7] https://www.open-mesh.org/issues/241 [8] https://patchwork.open-mesh.org/patch/4919/
Sven Eckelmann (5): batman-adv: Prevent duplicated gateway_node entry batman-adv: Prevent duplicated nc_node entry batman-adv: Prevent duplicated softif_vlan entry batman-adv: Prevent duplicated global TT entry batman-adv: Prevent duplicated tvlv handler
net/batman-adv/gateway_client.c | 8 ++++++-- net/batman-adv/network-coding.c | 27 +++++++++++++++------------ net/batman-adv/soft-interface.c | 21 ++++++++++++--------- net/batman-adv/translation-table.c | 6 ++++-- net/batman-adv/tvlv.c | 8 ++++++-- 5 files changed, 43 insertions(+), 27 deletions(-)
The function batadv_gw_node_add is responsible for adding new gw_node to the gateway_list. It is expecting that the caller already checked that there is not already an entry with the same key or not.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: bc3538cabac5 ("batman-adv: adding gateway functionality") Signed-off-by: Sven Eckelmann sven@narfation.org --- net/batman-adv/gateway_client.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 8b198ee7..0df4e87d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -32,6 +32,7 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> @@ -355,6 +356,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node;
+ lockdep_assert_held(&bat_priv->gw.list_lock); + if (gateway->bandwidth_down == 0) return;
@@ -369,10 +372,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
- spin_lock_bh(&bat_priv->gw.list_lock); kref_get(&gw_node->refcount); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list); - spin_unlock_bh(&bat_priv->gw.list_lock);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n", @@ -428,11 +429,14 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node, *curr_gw = NULL;
+ spin_lock_bh(&bat_priv->gw.list_lock); gw_node = batadv_gw_node_get(bat_priv, orig_node); if (!gw_node) { batadv_gw_node_add(bat_priv, orig_node, gateway); + spin_unlock_bh(&bat_priv->gw.list_lock); goto out; } + spin_unlock_bh(&bat_priv->gw.list_lock);
if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) && gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
On Monday, 13 August 2018 03:04:41 HKT Sven Eckelmann wrote:
@@ -355,6 +356,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node;
lockdep_assert_held(&bat_priv->gw.list_lock);
if (gateway->bandwidth_down == 0) return;
@@ -369,10 +372,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
spin_lock_bh(&bat_priv->gw.list_lock); kref_get(&gw_node->refcount); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
spin_unlock_bh(&bat_priv->gw.list_lock);
Not mandatory but in other cases the function kernel doc received a hint that $some lock needs to be acquired before the function should be called.
Acked-by: Marek Lindner mareklindner@neomailbox.ch
Cheers, Marek
The function batadv_nc_get_nc_node is responsible for adding new nc_nodes to the in_coding_list and out_coding_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: 3ed7ada3f0bb ("batman-adv: network coding - detect coding nodes and remove these after timeout") Signed-off-by: Sven Eckelmann sven@narfation.org --- net/batman-adv/network-coding.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index c3578444..34caf129 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -854,16 +854,27 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ struct list_head *list;
+ /* Select ingoing or outgoing coding node */ + if (in_coding) { + lock = &orig_neigh_node->in_coding_list_lock; + list = &orig_neigh_node->in_coding_list; + } else { + lock = &orig_neigh_node->out_coding_list_lock; + list = &orig_neigh_node->out_coding_list; + } + + spin_lock_bh(lock); + /* Check if nc_node is already added */ nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
/* Node found */ if (nc_node) - return nc_node; + goto unlock;
nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); if (!nc_node) - return NULL; + goto unlock;
/* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); @@ -872,22 +883,14 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, kref_get(&orig_neigh_node->refcount); nc_node->orig_node = orig_neigh_node;
- /* Select ingoing or outgoing coding node */ - if (in_coding) { - lock = &orig_neigh_node->in_coding_list_lock; - list = &orig_neigh_node->in_coding_list; - } else { - lock = &orig_neigh_node->out_coding_list_lock; - list = &orig_neigh_node->out_coding_list; - } - batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", nc_node->addr, nc_node->orig_node->orig);
/* Add nc_node to orig_node */ - spin_lock_bh(lock); kref_get(&nc_node->refcount); list_add_tail_rcu(&nc_node->list, list); + +unlock: spin_unlock_bh(lock);
return nc_node;
On Monday, 13 August 2018 03:04:42 HKT Sven Eckelmann wrote:
The function batadv_nc_get_nc_node is responsible for adding new nc_nodes to the in_coding_list and out_coding_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: 3ed7ada3f0bb ("batman-adv: network coding - detect coding nodes and remove these after timeout") Signed-off-by: Sven Eckelmann sven@narfation.org
Acked-by: Marek Lindner mareklindner@neomailbox.ch
Cheers, Marek
The function batadv_softif_vlan_get is responsible for adding new softif_vlan to the softif_vlan_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: 952cebb57518 ("batman-adv: add per VLAN interface attribute framework") Signed-off-by: Sven Eckelmann sven@narfation.org --- net/batman-adv/soft-interface.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 1485263a..426a98bf 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -572,17 +572,21 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_softif_vlan *vlan; - int err; + + spin_lock_bh(&bat_priv->softif_vlan_list_lock);
vlan = batadv_softif_vlan_get(bat_priv, vid); if (vlan) { batadv_softif_vlan_put(vlan); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -EEXIST; }
vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); - if (!vlan) + if (!vlan) { + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -ENOMEM; + }
vlan->bat_priv = bat_priv; vlan->vid = vid; @@ -590,17 +594,16 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
atomic_set(&vlan->ap_isolation, 0);
- err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); - if (err) { - kfree(vlan); - return err; - } - - spin_lock_bh(&bat_priv->softif_vlan_list_lock); kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+ /* adding sysfs can fail but it is now to late to handle it. + * it can also not be in the spinlock section due to the sleeping + * behavior of the sysfs functions and the fs_reclaim lock + */ + batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); + /* add a new TT local entry. This one will be marked with the NOPURGE * flag */
On Monday, 13 August 2018 03:04:43 HKT Sven Eckelmann wrote:
/* adding sysfs can fail but it is now to late to handle it.
* it can also not be in the spinlock section due to the sleeping
* behavior of the sysfs functions and the fs_reclaim lock
*/
batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
/* add a new TT local entry. This one will be marked with the
NOPURGE * flag */
Why is it too late to handle the error ? As long as this function returns an error to the caller the kernel will destroy the interface, ultimately allowing batman to free anything related to this interface ?
Having debugged some nasty TT VLAN bugs which were due to missing error handling in the VLAN creation routine make me a little nervous about the proposed approach ...
Cheers, Marek
The function batadv_tt_global_orig_entry_add is responsible for adding new tt_orig_list_entry to the orig_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: c5eb5bb30321 ("batman-adv: add reference counting for type batadv_tt_orig_list_entry") Signed-off-by: Sven Eckelmann sven@narfation.org --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 12a2b7d2..d21624c4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1613,6 +1613,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, { struct batadv_tt_orig_list_entry *orig_entry;
+ spin_lock_bh(&tt_global->list_lock); + orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); if (orig_entry) { /* refresh the ttvn: the current value could be a bogus one that @@ -1635,11 +1637,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, orig_entry->flags = flags; kref_init(&orig_entry->refcount);
- spin_lock_bh(&tt_global->list_lock); kref_get(&orig_entry->refcount); hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); - spin_unlock_bh(&tt_global->list_lock); atomic_inc(&tt_global->orig_list_count);
sync_flags: @@ -1647,6 +1647,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, out: if (orig_entry) batadv_tt_orig_list_entry_put(orig_entry); + + spin_unlock_bh(&tt_global->list_lock); }
/**
On Monday, 13 August 2018 03:04:44 HKT Sven Eckelmann wrote:
The function batadv_tt_global_orig_entry_add is responsible for adding new tt_orig_list_entry to the orig_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: c5eb5bb30321 ("batman-adv: add reference counting for type batadv_tt_orig_list_entry") Signed-off-by: Sven Eckelmann sven@narfation.org
Acked-by: Marek Lindner mareklindner@neomailbox.ch
Cheers, Marek
The function batadv_tvlv_handler_register is responsible for adding new tvlv_handler to the handler_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: 0b6aa0d43767 ("batman-adv: tvlv - basic infrastructure") Signed-off-by: Sven Eckelmann sven@narfation.org --- net/batman-adv/tvlv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index a6374582..40e69c93 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -529,15 +529,20 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, { struct batadv_tvlv_handler *tvlv_handler;
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); if (tvlv_handler) { + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); batadv_tvlv_handler_put(tvlv_handler); return; }
tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); - if (!tvlv_handler) + if (!tvlv_handler) { + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); return; + }
tvlv_handler->ogm_handler = optr; tvlv_handler->unicast_handler = uptr; @@ -547,7 +552,6 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, kref_init(&tvlv_handler->refcount); INIT_HLIST_NODE(&tvlv_handler->list);
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock); kref_get(&tvlv_handler->refcount); hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
On Monday, 13 August 2018 03:04:45 HKT Sven Eckelmann wrote:
The function batadv_tvlv_handler_register is responsible for adding new tvlv_handler to the handler_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted.
But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation.
The check and the manipulation of the list must therefore be in the same locked code section.
Fixes: 0b6aa0d43767 ("batman-adv: tvlv - basic infrastructure") Signed-off-by: Sven Eckelmann sven@narfation.org
Acked-by: Marek Lindner mareklindner@neomailbox.ch
Cheers, Marek
b.a.t.m.a.n@lists.open-mesh.org