Hi,
I tried to check through the code and identify problems not yet mentioned by Paul E. McKenney. They were found by reading through Documentation/RCU/checklist.txt and related documents in the folder.
It doesn't address the reference counting problem for gw_nodes and interfaces. Those leaks are happen in gw_election, get_batman_if_by_netdev and get_active_batman_if. We must increase the refcnt (using atomic_inc) inside the rcu_read_lock()..rcu_read_unlock() before we attach to the structure it "leaks". When another function now removed it from its usage context (primary_if, usage on stack, ...) then atomic_dec_and_test the refcnt. If it is decremented to zero then we can issue the call_rcu to the freeing function. So "put" of those functions is not allowed inside an rcu_read_lock. As said before the hold must always be called inside a rcu_read_lock.
Best regards, Sven
The update critical sections of if_list must be protected by a locking primitive other than RCU. The iterator must also be protected by the chosen locking mechanism.
The rtnl_lock in hardif_remove_interfaces must also be moved outside the iterator primitive to ensure that we don't deadlock the kernel due to differently nested locks in hardif_remove_interfaces and hard_if_event.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- batman-adv/hard-interface.c | 17 +++++++++++++++-- 1 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/batman-adv/hard-interface.c b/batman-adv/hard-interface.c index edbfddf..3cd7cb1 100644 --- a/batman-adv/hard-interface.c +++ b/batman-adv/hard-interface.c @@ -35,6 +35,9 @@
#define MIN(x, y) ((x) < (y) ? (x) : (y))
+/* protect update critical side of if_list - but not the content */ +static DEFINE_SPINLOCK(if_list_lock); + struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) { struct batman_if *batman_if; @@ -402,7 +405,11 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&batman_if->list);
check_known_mac_addr(batman_if->net_dev->dev_addr); + + spin_lock(&if_list_lock); list_add_tail_rcu(&batman_if->list, &if_list); + spin_unlock(&if_list_lock); + return batman_if;
free_if: @@ -430,6 +437,8 @@ static void hardif_remove_interface(struct batman_if *batman_if) return;
batman_if->if_status = IF_TO_BE_REMOVED; + + /* caller must take if_list_lock */ list_del_rcu(&batman_if->list); sysfs_del_hardif(&batman_if->hardif_obj); dev_put(batman_if->net_dev); @@ -440,11 +449,13 @@ void hardif_remove_interfaces(void) { struct batman_if *batman_if, *batman_if_tmp;
+ rtnl_lock(); + spin_lock(&if_list_lock); list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) { - rtnl_lock(); hardif_remove_interface(batman_if); - rtnl_unlock(); } + spin_unlock(&if_list_lock); + rtnl_unlock(); }
static int hard_if_event(struct notifier_block *this, @@ -469,7 +480,9 @@ static int hard_if_event(struct notifier_block *this, hardif_deactivate_interface(batman_if); break; case NETDEV_UNREGISTER: + spin_lock(&if_list_lock); hardif_remove_interface(batman_if); + spin_unlock(&if_list_lock); break; case NETDEV_CHANGEMTU: if (batman_if->soft_iface)
hlist_add_head_rcu must be protected using gw_list_lock of the current bat_priv like already done with hlist_del_rcu.
It is important that this lock is now always done using spin_lock_irqsave because gw_node_add can also be called indirectly from parts of the kernel with interrupts disabled.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- batman-adv/gateway_client.c | 13 +++++++++---- 1 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/batman-adv/gateway_client.c b/batman-adv/gateway_client.c index 6721398..dd96d99 100644 --- a/batman-adv/gateway_client.c +++ b/batman-adv/gateway_client.c @@ -196,6 +196,7 @@ static void gw_node_add(struct bat_priv *bat_priv, { struct gw_node *gw_node; int down, up; + unsigned long flags;
gw_node = kmalloc(sizeof(struct gw_node), GFP_ATOMIC); if (!gw_node) @@ -205,7 +206,9 @@ static void gw_node_add(struct bat_priv *bat_priv, INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node;
+ spin_lock_irqsave(&bat_priv->gw_list_lock, flags); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); + spin_unlock_irqrestore(&bat_priv->gw_list_lock, flags);
gw_srv_class_to_kbit(new_gwflags, &down, &up); bat_dbg(DBG_BATMAN, bat_priv, @@ -273,8 +276,9 @@ void gw_node_purge_deleted(struct bat_priv *bat_priv) struct gw_node *gw_node; struct hlist_node *node, *node_tmp; unsigned long timeout = 2 * PURGE_TIMEOUT * HZ; + unsigned long flags;
- spin_lock(&bat_priv->gw_list_lock); + spin_lock_irqsave(&bat_priv->gw_list_lock, flags);
hlist_for_each_entry_safe(gw_node, node, node_tmp, &bat_priv->gw_list, list) { @@ -286,15 +290,16 @@ void gw_node_purge_deleted(struct bat_priv *bat_priv) } }
- spin_unlock(&bat_priv->gw_list_lock); + spin_unlock_irqrestore(&bat_priv->gw_list_lock, flags); }
void gw_node_list_free(struct bat_priv *bat_priv) { struct gw_node *gw_node; struct hlist_node *node, *node_tmp; + unsigned long flags;
- spin_lock(&bat_priv->gw_list_lock); + spin_lock_irqsave(&bat_priv->gw_list_lock, flags);
hlist_for_each_entry_safe(gw_node, node, node_tmp, &bat_priv->gw_list, list) { @@ -303,7 +308,7 @@ void gw_node_list_free(struct bat_priv *bat_priv) }
gw_deselect(bat_priv); - spin_unlock(&bat_priv->gw_list_lock); + spin_unlock_irqrestore(&bat_priv->gw_list_lock, flags); }
static int _write_buffer_text(struct bat_priv *bat_priv,
receive_bat_packet is not called with rcu_read_lock so we must ensure by ourself that we protect list_for_each_entry_rcu using the correct RCU locks.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- batman-adv/routing.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/batman-adv/routing.c b/batman-adv/routing.c index 603a932..1781b7e 100644 --- a/batman-adv/routing.c +++ b/batman-adv/routing.c @@ -575,6 +575,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, batman_packet->tq, batman_packet->ttl, batman_packet->version, has_directlink_flag);
+ rcu_read_lock(); list_for_each_entry_rcu(batman_if, &if_list, list) { if (batman_if->if_status != IF_ACTIVE) continue; @@ -597,6 +598,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, if (compare_orig(ethhdr->h_source, broadcast_addr)) is_broadcast = 1; } + rcu_read_unlock();
if (batman_packet->version != COMPAT_VERSION) { bat_dbg(DBG_BATMAN, bat_priv,
Regions which do not use rcu functions don't need to protected by rcu_read_lock. If we want to protect data from being freed than it must be covered by the same read-side critical section or otherwise the grace period may already ended and freed the memory before we called rcu_read_lock again.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- batman-adv/gateway_client.c | 4 ---- batman-adv/originator.c | 2 -- 2 files changed, 0 insertions(+), 6 deletions(-)
diff --git a/batman-adv/gateway_client.c b/batman-adv/gateway_client.c index dd96d99..bfac0ff 100644 --- a/batman-adv/gateway_client.c +++ b/batman-adv/gateway_client.c @@ -342,9 +342,7 @@ int gw_client_seq_print_text(struct seq_file *seq, void *offset) struct hlist_node *node; int gw_count = 0;
- rcu_read_lock(); if (!bat_priv->primary_if) { - rcu_read_unlock();
return seq_printf(seq, "BATMAN mesh %s disabled - please " "specify interfaces to enable it\n", @@ -352,7 +350,6 @@ int gw_client_seq_print_text(struct seq_file *seq, void *offset) }
if (bat_priv->primary_if->if_status != IF_ACTIVE) { - rcu_read_unlock();
return seq_printf(seq, "BATMAN mesh %s disabled - " "primary interface not active\n", @@ -365,7 +362,6 @@ int gw_client_seq_print_text(struct seq_file *seq, void *offset) "outgoingIF", SOURCE_VERSION, REVISION_VERSION_STR, bat_priv->primary_if->net_dev->name, bat_priv->primary_if->addr_str, net_dev->name); - rcu_read_unlock();
rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { diff --git a/batman-adv/originator.c b/batman-adv/originator.c index 2250266..3424ac2 100644 --- a/batman-adv/originator.c +++ b/batman-adv/originator.c @@ -335,7 +335,6 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) net_dev->name); }
- rcu_read_lock(); seq_printf(seq, "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%s (%s)]\n", SOURCE_VERSION, REVISION_VERSION_STR, bat_priv->primary_if->net_dev->name, @@ -343,7 +342,6 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", "outgoingIF", "Potential nexthops"); - rcu_read_unlock();
spin_lock_irqsave(&bat_priv->orig_hash_lock, flags);
Hi,
if anyone wants to start to track all usage cases of the batman_if and gw_node then he can use the functions defined in following two patches. The patches are not fixes, but they provide a starting point for the actual bugfix.
Best regards, Sven
Sven Eckelmann wrote:
Hi,
if anyone wants to start to track all usage cases of the batman_if and gw_node then he can use the functions defined in following two patches. The patches are not fixes, but they provide a starting point for the actual bugfix.
Please ignore
[PATCH 1/2] batman-adv: Use refcnt to track usage count of gw_node [PATCH 2/2] batman-adv: Use refcnt to track usage count of batman_if [PATCH 2/2] batman-adv: Use refcnt to track usage count of batman_if
The problem is that the refcnt check must be done after the grace period and not before. Otherwise we may think that nobody uses it anymore, but instead it is still referenced inside a rcu_read_lock...rcu_read_unlock and the refcnt will be updated a little bit later than the refcnt check is made.
rcu_read_lock | xyz_write_lock xyz_list_rcu { | xyz_list_del_rcu(x->list) .... | xyz_put(x) xyz_hold(x) | -> call_rcu(free_xyz) .... | rcu_read_unlock | | -> free_xyz(x)
I will repost the whole patch set later. The solution is to use synchronize_rcu instead of call_rcu and calling free_xyz directly. This solution is not possible if sleeping is not allowed in that situation or it is relative time critical.
Another way is to add a deleted flag and an extra spinlock. This spinlock must be used before "put"ting/holding an element in rcu_read_lock. So the reader side would need following code:
spin_lock(&x->lock); if (x->deleted) { be confused and dont use it as valid candidate } else { mark it as valid candidate and hold it } spin_unlock(&x->lock);
on the updater/writer site we must use something like that:
spin_lock(&x->lock); list_del_rcu(&x->list); x->deleted = 1; spin_unlock(&x->lock); put element
Best regards, Sven
gw_election may leak data from the rcu protected list of all gateway nodes outside the read-side critical area. This is not valid as we may free the data using a call_rcu created callback after we unlock using rcu_read_unlock. A workaround is to provide a reference count to be sure that the memory isn't freed to early.
It is currently only to implement the already existing functionality and doesn't provide the full tracking of all usage cases.
Additionally, we must gw_node_hold inside the rcu_read_lock()..rcu_read_unlock() before we attach to the structure which "leaks" it. When another function now removed it from its usage context (curr_gw, usage on stack, ...) then we must gw_node_put it. If it is decremented to zero then we can issue the call_rcu to the freeing function. So "put" is not allowed inside an rcu_read_lock.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- batman-adv/gateway_client.c | 19 +++++++++++++++++-- batman-adv/types.h | 1 + 2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/batman-adv/gateway_client.c b/batman-adv/gateway_client.c index bfac0ff..281da92 100644 --- a/batman-adv/gateway_client.c +++ b/batman-adv/gateway_client.c @@ -28,6 +28,19 @@ #include <linux/udp.h> #include <linux/if_vlan.h>
+static void gw_node_free(struct rcu_head *rcu); + +static void gw_node_hold(struct gw_node *gw_node) +{ + atomic_inc(&gw_node->refcnt); +} + +static void gw_node_put(struct gw_node *gw_node) +{ + if (atomic_dec_and_test(&gw_node->refcnt)) + call_rcu(&gw_node->rcu, gw_node_free); +} + void *gw_get_selected(struct bat_priv *bat_priv) { struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; @@ -205,6 +218,8 @@ static void gw_node_add(struct bat_priv *bat_priv, memset(gw_node, 0, sizeof(struct gw_node)); INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; + atomic_set(&gw_node->refcnt, 0); + gw_node_hold(gw_node);
spin_lock_irqsave(&bat_priv->gw_list_lock, flags); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); @@ -286,7 +301,7 @@ void gw_node_purge_deleted(struct bat_priv *bat_priv) (time_after(jiffies, gw_node->deleted + timeout))) {
hlist_del_rcu(&gw_node->list); - call_rcu(&gw_node->rcu, gw_node_free); + gw_node_put(gw_node); } }
@@ -304,7 +319,7 @@ void gw_node_list_free(struct bat_priv *bat_priv) hlist_for_each_entry_safe(gw_node, node, node_tmp, &bat_priv->gw_list, list) { hlist_del_rcu(&gw_node->list); - call_rcu(&gw_node->rcu, gw_node_free); + gw_node_put(gw_node); }
gw_deselect(bat_priv); diff --git a/batman-adv/types.h b/batman-adv/types.h index e7b53a4..a088064 100644 --- a/batman-adv/types.h +++ b/batman-adv/types.h @@ -96,6 +96,7 @@ struct gw_node { struct hlist_node list; struct orig_node *orig_node; unsigned long deleted; + atomic_t refcnt; struct rcu_head rcu; };
get_batman_if_by_netdev and get_active_batman_if may leak data from the rcu protected list of interfaces. The rcu protected list of all gateway nodes leaks the actual data outside the read-side critical area. This is not valid as we may free the data using a call_rcu created callback after we unlock using rcu_read_unlock. A workaround is to provide a reference count to be sure that the memory isn't freed to early.
It is currently only to implement the already existing functionality and doesn't provide the full tracking of all usage cases.
Additionally, we must hardif_hold inside the rcu_read_lock()..rcu_read_unlock() before we attach to the structure which "leaks" it. When another function now removed it from its usage context (primary_if, usage on stack, ...) then we must hardif_put it. If it is decremented to zero then we can issue the call_rcu to the freeing function. So "put" is not allowed inside an rcu_read_lock.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- batman-adv/hard-interface.c | 6 ++++-- batman-adv/hard-interface.h | 13 +++++++++++++ batman-adv/types.h | 1 + 3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/batman-adv/hard-interface.c b/batman-adv/hard-interface.c index 3cd7cb1..d7439e3 100644 --- a/batman-adv/hard-interface.c +++ b/batman-adv/hard-interface.c @@ -403,6 +403,8 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) batman_if->soft_iface = NULL; batman_if->if_status = IF_NOT_IN_USE; INIT_LIST_HEAD(&batman_if->list); + atomic_set(&batman_if->refcnt, 0); + hardif_hold(batman_if);
check_known_mac_addr(batman_if->net_dev->dev_addr);
@@ -420,7 +422,7 @@ out: return NULL; }
-static void hardif_free_interface(struct rcu_head *rcu) +void hardif_free_interface(struct rcu_head *rcu) { struct batman_if *batman_if = container_of(rcu, struct batman_if, rcu);
@@ -442,7 +444,7 @@ static void hardif_remove_interface(struct batman_if *batman_if) list_del_rcu(&batman_if->list); sysfs_del_hardif(&batman_if->hardif_obj); dev_put(batman_if->net_dev); - call_rcu(&batman_if->rcu, hardif_free_interface); + hardif_hold(batman_if); }
void hardif_remove_interfaces(void) diff --git a/batman-adv/hard-interface.h b/batman-adv/hard-interface.h index 4b49527..0f72b17 100644 --- a/batman-adv/hard-interface.h +++ b/batman-adv/hard-interface.h @@ -41,5 +41,18 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *orig_dev); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); +void hardif_free_interface(struct rcu_head *rcu); + +static inline void hardif_hold(struct batman_if *batman_if) +{ + atomic_inc(&batman_if->refcnt); +} + +static inline void hardif_put(struct batman_if *batman_if) +{ + if (atomic_dec_and_test(&batman_if->refcnt)) + call_rcu(&batman_if->rcu, hardif_free_interface); +} +
#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ diff --git a/batman-adv/types.h b/batman-adv/types.h index a088064..80880cf 100644 --- a/batman-adv/types.h +++ b/batman-adv/types.h @@ -44,6 +44,7 @@ struct batman_if { unsigned char *packet_buff; int packet_len; struct kobject *hardif_obj; + atomic_t refcnt; struct rcu_head rcu; struct packet_type batman_adv_ptype; struct net_device *soft_iface;
get_batman_if_by_netdev and get_active_batman_if may leak data from the rcu protected list of interfaces. The rcu protected list of all gateway nodes leaks the actual data outside the read-side critical area. This is not valid as we may free the data using a call_rcu created callback after we unlock using rcu_read_unlock. A workaround is to provide a reference count to be sure that the memory isn't freed to early.
It is currently only to implement the already existing functionality and doesn't provide the full tracking of all usage cases.
Additionally, we must hardif_hold inside the rcu_read_lock()..rcu_read_unlock() before we attach to the structure which "leaks" it. When another function now removed it from its usage context (primary_if, usage on stack, ...) then we must hardif_put it. If it is decremented to zero then we can issue the call_rcu to the freeing function. So "put" is not allowed inside an rcu_read_lock.
Signed-off-by: Sven Eckelmann sven.eckelmann@gmx.de --- Hmpf, small copy and paste failure (used hardif_hold instead of hardif_put).
batman-adv/hard-interface.c | 6 ++++-- batman-adv/hard-interface.h | 13 +++++++++++++ batman-adv/types.h | 1 + 3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/batman-adv/hard-interface.c b/batman-adv/hard-interface.c index 3cd7cb1..4513856 100644 --- a/batman-adv/hard-interface.c +++ b/batman-adv/hard-interface.c @@ -403,6 +403,8 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) batman_if->soft_iface = NULL; batman_if->if_status = IF_NOT_IN_USE; INIT_LIST_HEAD(&batman_if->list); + atomic_set(&batman_if->refcnt, 0); + hardif_hold(batman_if);
check_known_mac_addr(batman_if->net_dev->dev_addr);
@@ -420,7 +422,7 @@ out: return NULL; }
-static void hardif_free_interface(struct rcu_head *rcu) +void hardif_free_interface(struct rcu_head *rcu) { struct batman_if *batman_if = container_of(rcu, struct batman_if, rcu);
@@ -442,7 +444,7 @@ static void hardif_remove_interface(struct batman_if *batman_if) list_del_rcu(&batman_if->list); sysfs_del_hardif(&batman_if->hardif_obj); dev_put(batman_if->net_dev); - call_rcu(&batman_if->rcu, hardif_free_interface); + hardif_put(batman_if); }
void hardif_remove_interfaces(void) diff --git a/batman-adv/hard-interface.h b/batman-adv/hard-interface.h index 4b49527..0f72b17 100644 --- a/batman-adv/hard-interface.h +++ b/batman-adv/hard-interface.h @@ -41,5 +41,18 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *orig_dev); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); +void hardif_free_interface(struct rcu_head *rcu); + +static inline void hardif_hold(struct batman_if *batman_if) +{ + atomic_inc(&batman_if->refcnt); +} + +static inline void hardif_put(struct batman_if *batman_if) +{ + if (atomic_dec_and_test(&batman_if->refcnt)) + call_rcu(&batman_if->rcu, hardif_free_interface); +} +
#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ diff --git a/batman-adv/types.h b/batman-adv/types.h index a088064..80880cf 100644 --- a/batman-adv/types.h +++ b/batman-adv/types.h @@ -44,6 +44,7 @@ struct batman_if { unsigned char *packet_buff; int packet_len; struct kobject *hardif_obj; + atomic_t refcnt; struct rcu_head rcu; struct packet_type batman_adv_ptype; struct net_device *soft_iface;
b.a.t.m.a.n@lists.open-mesh.org