The description given in the commit is misleading and is at least not true for sysfs. The sysfs file structure should instead be deleted immediately on rtnl notifications and be underlying objects should be removed later when all kobject_put were done for the object.
This reverts commit 6b0485c758be ("batman-adv: Fix hardif remove/add race")
Signed-off-by: Sven Eckelmann sven@narfation.org --- v5: - add this revert --- net/batman-adv/debugfs.c | 23 ----------------------- net/batman-adv/debugfs.h | 1 - net/batman-adv/hard-interface.c | 19 ------------------- net/batman-adv/sysfs.c | 17 ----------------- net/batman-adv/sysfs.h | 2 -- 5 files changed, 62 deletions(-)
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index aea4133..1d68b6e 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -44,7 +44,6 @@ #include "translation-table.h"
static struct dentry *batadv_debugfs; -static atomic_t batadv_rename = ATOMIC_INIT(0);
static int batadv_algorithms_open(struct inode *inode, struct file *file) { @@ -348,28 +347,6 @@ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) } }
-/** - * batadv_debugfs_rename_hardif - rename the base directory - * @hard_iface: hard interface which is renamed. - * - * The interface may be removed and then quickly added back - * again. Rename the old instance to something temporary and unique, - * so avoiding a name space clash if it does reappear before the deferred - * work completes the removal. - */ -void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) -{ - char new_name[32]; - - snprintf(new_name, sizeof(*new_name) - 1, "tmp-%d", - atomic_inc_return(&batadv_rename)); - - if (batadv_debugfs && hard_iface->debug_dir) { - debugfs_rename(batadv_debugfs, hard_iface->debug_dir, - batadv_debugfs, new_name); - } -} - int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index e7d19c1..1ab4e2e 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -34,7 +34,6 @@ int batadv_debugfs_add_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); -void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
#else
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index ccc7641..1f90808 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -706,23 +706,6 @@ out: return NULL; }
-/** - * batadv_hardif_rename - rename the sysfs and debugfs - * @hard_iface: The hard interface to rename - * - * The sysfs and debugfs files cannot be removed until all users close - * them. So the removal is deferred into a work queue. This however - * means if the interface comes back before the work queue runs, the - * files are still there, and so the create gives an EEXISTS error. To - * avoid this, rename them to a tempory name. - */ -static void batadv_hardif_rename(struct batadv_hard_iface *hard_iface) -{ - batadv_sysfs_rename_hardif(&hard_iface->hardif_obj, - hard_iface->net_dev); - batadv_debugfs_rename_hardif(hard_iface); -} - static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) { ASSERT_RTNL(); @@ -736,8 +719,6 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) return;
hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - batadv_hardif_rename(hard_iface); - queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); }
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 9b2c28f..fe9ca94 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -48,8 +48,6 @@ #include "packet.h" #include "soft-interface.h"
-static atomic_t batadv_rename = ATOMIC_INIT(0); - static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) { struct device *dev = container_of(obj->parent, struct device, kobj); @@ -1048,21 +1046,6 @@ out: return -ENOMEM; }
-void batadv_sysfs_rename_hardif(struct kobject **hardif_obj, - struct net_device *dev) -{ - char new_name[32]; - int err; - - snprintf(new_name, sizeof(*new_name) - 1, "tmp-%d", - atomic_inc_return(&batadv_rename)); - - err = kobject_rename(*hardif_obj, new_name); - if (err) - batadv_err(dev, "Can't rename sysfs dir: %s/%s: %d\n", - dev->name, new_name, err); -} - void batadv_sysfs_del_hardif(struct kobject **hardif_obj) { kobject_put(*hardif_obj); diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 64d3722..c76021b 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -48,8 +48,6 @@ void batadv_sysfs_del_meshif(struct net_device *dev); int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev); void batadv_sysfs_del_hardif(struct kobject **hardif_obj); -void batadv_sysfs_rename_hardif(struct kobject **hardif_obj, - struct net_device *dev); int batadv_sysfs_add_vlan(struct net_device *dev, struct batadv_softif_vlan *vlan); void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
The legacy sysfs interface to modify interfaces belonging to batman-adv is run inside a region holding s_lock. And to add a net_device, it has to also get the rtnl_lock. This is exactly the other way around than in other virtual net_devices and conflicts with netdevice notifier which executes inside rtnl_lock.
The inverted lock situation is currently solved by executing the removal of netdevices via workqueue. The workqueue isn't executed inside rtnl_lock and thus can independently get the s_lock and the rtnl_lock.
But this workaround fails when the netdevice notifier creates events in quick succession and the earlier triggered removal of a net_device isn't processed in the workqueue before the adding of the new netdevice (with same name) event is issued.
Instead the legacy sysfs interface store events have to be enqueued in a workqueue to loose the s_lock. The worker is then free to get the required locks and the deadlock is avoided.
Signed-off-by: Sven Eckelmann sven@narfation.org --- v5: - rebase on top of current master v4: - rebase on top of current master v3: - rebased on top of current master to fix conflicts with newest patches v2: - rebased on top of current master to fix conflicts with newest patches --- net/batman-adv/sysfs.c | 107 +++++++++++++++++++++++++++++++++++++------------ net/batman-adv/types.h | 13 ++++++ 2 files changed, 94 insertions(+), 26 deletions(-)
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index fe9ca94..8528959 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -37,6 +37,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/stringify.h> +#include <linux/workqueue.h>
#include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -828,31 +829,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj, return length; }
-static ssize_t batadv_store_mesh_iface(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) +/** + * batadv_store_mesh_iface_finish - store new hardif mesh_iface state + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @ifname: name of soft-interface to modify + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + * + * Return: 0 on success, 0 < on failure + */ +static int batadv_store_mesh_iface_finish(struct net_device *net_dev, + char ifname[IFNAMSIZ]) { - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct net *net = dev_net(net_dev); struct batadv_hard_iface *hard_iface; - int status_tmp = -1; - int ret = count; + int status_tmp; + int ret = 0; + + ASSERT_RTNL();
hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) - return count; - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (strlen(buff) >= IFNAMSIZ) { - pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", - buff); - batadv_hardif_put(hard_iface); - return -EINVAL; - } + return 0;
- if (strncmp(buff, "none", 4) == 0) + if (strncmp(ifname, "none", 4) == 0) status_tmp = BATADV_IF_NOT_IN_USE; else status_tmp = BATADV_IF_I_WANT_YOU; @@ -861,15 +862,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, goto out;
if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) + (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) goto out;
- rtnl_lock(); - if (status_tmp == BATADV_IF_NOT_IN_USE) { batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - goto unlock; + goto out; }
/* if the interface already is in use */ @@ -877,15 +876,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO);
- ret = batadv_hardif_enable_interface(hard_iface, net, buff); - -unlock: - rtnl_unlock(); + ret = batadv_hardif_enable_interface(hard_iface, net, ifname); out: batadv_hardif_put(hard_iface); return ret; }
+/** + * batadv_store_mesh_iface_work - store new hardif mesh_iface state + * @work: work queue item + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + */ +static void batadv_store_mesh_iface_work(struct work_struct *work) +{ + struct batadv_store_mesh_work *store_work; + int ret; + + store_work = container_of(work, struct batadv_store_mesh_work, work); + + rtnl_lock(); + ret = batadv_store_mesh_iface_finish(store_work->net_dev, + store_work->soft_iface_name); + rtnl_unlock(); + + if (ret < 0) + pr_err("Failed to store new mesh_iface state %s for %s: %d\n", + store_work->soft_iface_name, store_work->net_dev->name, + ret); + + dev_put(store_work->net_dev); + kfree(store_work); +} + +static ssize_t batadv_store_mesh_iface(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct batadv_store_mesh_work *store_work; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (strlen(buff) >= IFNAMSIZ) { + pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", + buff); + return -EINVAL; + } + + store_work = kmalloc(sizeof(*store_work), GFP_KERNEL); + if (!store_work) + return -ENOMEM; + + dev_hold(net_dev); + INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); + store_work->net_dev = net_dev; + strlcpy(store_work->soft_iface_name, buff, + sizeof(store_work->soft_iface_name)); + + queue_work(batadv_event_workqueue, &store_work->work); + + return count; +} + static ssize_t batadv_show_iface_status(struct kobject *kobj, struct attribute *attr, char *buff) { diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 0eeb68f..81de9be 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1562,4 +1562,17 @@ enum batadv_tvlv_handler_flags { BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), };
+/** + * struct batadv_store_mesh_work - Work queue item to detach add/del interface + * from sysfs locks + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @soft_iface_name: name of soft-interface to modify + * @work: work queue item + */ +struct batadv_store_mesh_work { + struct net_device *net_dev; + char soft_iface_name[IFNAMSIZ]; + struct work_struct work; +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */
On Monday, June 13, 2016 07:41:30 Sven Eckelmann wrote:
The legacy sysfs interface to modify interfaces belonging to batman-adv is run inside a region holding s_lock. And to add a net_device, it has to also get the rtnl_lock. This is exactly the other way around than in other virtual net_devices and conflicts with netdevice notifier which executes inside rtnl_lock.
The inverted lock situation is currently solved by executing the removal of netdevices via workqueue. The workqueue isn't executed inside rtnl_lock and thus can independently get the s_lock and the rtnl_lock.
But this workaround fails when the netdevice notifier creates events in quick succession and the earlier triggered removal of a net_device isn't processed in the workqueue before the adding of the new netdevice (with same name) event is issued.
Instead the legacy sysfs interface store events have to be enqueued in a workqueue to loose the s_lock. The worker is then free to get the required locks and the deadlock is avoided.
Signed-off-by: Sven Eckelmann sven@narfation.org
v5:
- rebase on top of current master
v4:
- rebase on top of current master
v3:
- rebased on top of current master to fix conflicts with newest patches
v2:
- rebased on top of current master to fix conflicts with newest patches
net/batman-adv/sysfs.c | 107 +++++++++++++++++++++++++++++++++++++------------ net/batman-adv/types.h | 13 ++++++ 2 files changed, 94 insertions(+), 26 deletions(-)
Applied in revision a1f0a80.
Thanks, Marek
Postponing the removal of the interface breaks the expected behavior of NETDEV_UNREGISTER and NETDEV_PRE_TYPE_CHANGE. This is especially problematic when an interface is removed and added in quick succession.
This reverts commit a33c882c1069 ("batman-adv: postpone sysfs removal when unregistering").
Signed-off-by: Sven Eckelmann sven@narfation.org --- v5: - rebase on top of current master v4: - rebase on top of current master v3: - rebased on top of current master to fix conflicts with newest patches v2: - rebased on top of current master to fix conflicts with newest patches - adjust commit message to use 12-char commit id + commit subject to reference reverted commit --- net/batman-adv/hard-interface.c | 26 +++--------------------- net/batman-adv/soft-interface.c | 44 ++++++++++++----------------------------- net/batman-adv/types.h | 4 ---- 3 files changed, 16 insertions(+), 58 deletions(-)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 1f90808..714af8e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -35,7 +35,6 @@ #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/workqueue.h>
#include "bat_v.h" #include "bridge_loop_avoidance.h" @@ -625,25 +624,6 @@ out: batadv_hardif_put(primary_if); }
-/** - * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif - * @work: work queue item - * - * Free the parts of the hard interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_hardif_remove_interface_finish(struct work_struct *work) -{ - struct batadv_hard_iface *hard_iface; - - hard_iface = container_of(work, struct batadv_hard_iface, - cleanup_work); - - batadv_debugfs_del_hardif(hard_iface); - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); - batadv_hardif_put(hard_iface); -} - static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { @@ -676,8 +656,6 @@ batadv_hardif_add_interface(struct net_device *net_dev)
INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); - INIT_WORK(&hard_iface->cleanup_work, - batadv_hardif_remove_interface_finish);
spin_lock_init(&hard_iface->neigh_list_lock);
@@ -719,7 +697,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) return;
hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); + batadv_debugfs_del_hardif(hard_iface); + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); + batadv_hardif_put(hard_iface); }
void batadv_hardif_remove_interfaces(void) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7527c06..216ac03 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -39,6 +39,7 @@ #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/socket.h> @@ -46,7 +47,6 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> -#include <linux/workqueue.h>
#include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -747,34 +747,6 @@ static void batadv_set_lockdep_class(struct net_device *dev) }
/** - * batadv_softif_destroy_finish - cleans up the remains of a softif - * @work: work queue item - * - * Free the parts of the soft interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_softif_destroy_finish(struct work_struct *work) -{ - struct batadv_softif_vlan *vlan; - struct batadv_priv *bat_priv; - struct net_device *soft_iface; - - bat_priv = container_of(work, struct batadv_priv, - cleanup_work); - soft_iface = bat_priv->soft_iface; - - /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); - if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); - } - - batadv_sysfs_del_meshif(soft_iface); - unregister_netdev(soft_iface); -} - -/** * batadv_softif_init_late - late stage initialization of soft interface * @dev: registered network device to modify * @@ -791,7 +763,6 @@ static int batadv_softif_init_late(struct net_device *dev)
bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; - INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
/* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -1028,8 +999,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) void batadv_softif_destroy_sysfs(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_softif_vlan *vlan; + + ASSERT_RTNL(); + + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + }
- queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); + batadv_sysfs_del_meshif(soft_iface); + unregister_netdevice(soft_iface); }
/** diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 81de9be..fb445c5 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -132,7 +132,6 @@ struct batadv_hard_iface_bat_v { * @rcu: struct used for freeing in an RCU-safe manner * @bat_iv: per hard-interface B.A.T.M.A.N. IV data * @bat_v: per hard-interface B.A.T.M.A.N. V data - * @cleanup_work: work queue callback item for hard-interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs * @neigh_list: list of unique single hop neighbors via this interface * @neigh_list_lock: lock protecting neigh_list @@ -152,7 +151,6 @@ struct batadv_hard_iface { #ifdef CONFIG_BATMAN_ADV_BATMAN_V struct batadv_hard_iface_bat_v bat_v; #endif - struct work_struct cleanup_work; struct dentry *debug_dir; struct hlist_head neigh_list; /* neigh_list_lock protects: neigh_list */ @@ -1013,7 +1011,6 @@ struct batadv_priv_bat_v { * @forw_bcast_list_lock: lock protecting forw_bcast_list * @tp_list_lock: spinlock protecting @tp_list * @orig_work: work queue callback item for orig node purging - * @cleanup_work: work queue callback item for soft-interface deinit * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface * @algo_ops: routing algorithm used by this mesh interface @@ -1072,7 +1069,6 @@ struct batadv_priv { spinlock_t tp_list_lock; /* protects tp_list */ atomic_t tp_num; struct delayed_work orig_work; - struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *algo_ops; struct hlist_head softif_vlan_list;
On Monday, June 13, 2016 07:41:31 Sven Eckelmann wrote:
Postponing the removal of the interface breaks the expected behavior of NETDEV_UNREGISTER and NETDEV_PRE_TYPE_CHANGE. This is especially problematic when an interface is removed and added in quick succession.
This reverts commit a33c882c1069 ("batman-adv: postpone sysfs removal when unregistering").
Signed-off-by: Sven Eckelmann sven@narfation.org
v5:
- rebase on top of current master
v4:
- rebase on top of current master
v3:
- rebased on top of current master to fix conflicts with newest patches
v2:
- rebased on top of current master to fix conflicts with newest patches
- adjust commit message to use 12-char commit id + commit subject to reference reverted commit
net/batman-adv/hard-interface.c | 26 +++--------------------- net/batman-adv/soft-interface.c | 44 ++++++++++++----------------------------- net/batman-adv/types.h | 4 ---- 3 files changed, 16 insertions(+), 58 deletions(-)
Applied in revision 0994697.
Thanks, Marek
The kobject_put is only removing the sysfs entry and corresponding entries when its reference counter becomes zero. This tends to lead to collisions when a device is moved between two different network namespaces because some of the sysfs files have to be removed first and then added again to the already moved sysfs entry.
WARNING: CPU: 0 PID: 290 at lib/kobject.c:240 kobject_add_internal+0x5ec/0x8a0 kobject_add_internal failed for batman_adv with -EEXIST, don't try to register things with the same name in the same directory.
But the caller of kobject_put can already remove the sysfs entry before it does the kobject_put. This removal is done even when the reference counter is not yet zero and thus avoids the problem.
Signed-off-by: Sven Eckelmann sven@narfation.org --- v5: - rebase on top of current master v4: - rebase on top of current master - avoid deletes of vlan->kobj when it is a pointer to bat_priv->mesh_obj (happens for untagged vlan and can cause some kernfs warnings) v3: - rebased on top of current master to fix conflicts with newest patches v2: - rebased on top of current master to fix conflicts with newest patches --- net/batman-adv/sysfs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 8528959..4e06cb7 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -713,6 +713,8 @@ rem_attr: for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+ kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; out: @@ -727,6 +729,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev) for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+ kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; } @@ -782,6 +786,10 @@ rem_attr: for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
+ if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; out: @@ -801,6 +809,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
+ if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; } @@ -1103,6 +1115,8 @@ out:
void batadv_sysfs_del_hardif(struct kobject **hardif_obj) { + kobject_uevent(*hardif_obj, KOBJ_REMOVE); + kobject_del(*hardif_obj); kobject_put(*hardif_obj); *hardif_obj = NULL; }
On Monday, June 13, 2016 07:41:32 Sven Eckelmann wrote:
The kobject_put is only removing the sysfs entry and corresponding entries when its reference counter becomes zero. This tends to lead to collisions when a device is moved between two different network namespaces because some of the sysfs files have to be removed first and then added again to the already moved sysfs entry.
WARNING: CPU: 0 PID: 290 at lib/kobject.c:240
kobject_add_internal+0x5ec/0x8a0 kobject_add_internal failed for batman_adv with -EEXIST, don't try to register things with the same name in the same directory.
But the caller of kobject_put can already remove the sysfs entry before it does the kobject_put. This removal is done even when the reference counter is not yet zero and thus avoids the problem.
Signed-off-by: Sven Eckelmann sven@narfation.org
v5:
- rebase on top of current master
v4:
- rebase on top of current master
- avoid deletes of vlan->kobj when it is a pointer to bat_priv->mesh_obj (happens for untagged vlan and can cause some kernfs warnings)
v3:
- rebased on top of current master to fix conflicts with newest patches
v2:
- rebased on top of current master to fix conflicts with newest patches
net/batman-adv/sysfs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
Applied in revision bebf178.
Thanks, Marek
On Monday, June 13, 2016 07:41:29 Sven Eckelmann wrote:
The description given in the commit is misleading and is at least not true for sysfs. The sysfs file structure should instead be deleted immediately on rtnl notifications and be underlying objects should be removed later when all kobject_put were done for the object.
This reverts commit 6b0485c758be ("batman-adv: Fix hardif remove/add race")
Applied in revision b58db7b.
@Andrew: Sven's patchset aims to fix the problem you encountered in a better way while also addressing other issues. Can you give it a try in your environment to confirm it fixes your problem too ?
Thanks, Marek
b.a.t.m.a.n@lists.open-mesh.org