So far on purging broadcast and ogm queues we temporarily give up the
spin lock of these queues to be able to cancel any scheduled forwarding
work. However this is unsafe and can lead to a general protection error
in batadv_purge_outstanding_packets().
With this patch we split the queue purging into two steps: First
removing forward packets from those queues and signaling the
cancelation. Secondly, we are actively canceling any scheduled
forwarding, wait for any running forwarding to finish and only free a
forw_packet afterwards.
Signed-off-by: Linus Lüssing <linus.luessing(a)web.de>
---
Fixes issue #168
send.c | 117 ++++++++++++++++++++++++++++++++++++++-------------------------
types.h | 1 +
2 files changed, 71 insertions(+), 47 deletions(-)
diff --git a/send.c b/send.c
index 0a0bb45..f93476b 100644
--- a/send.c
+++ b/send.c
@@ -245,6 +245,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
bat_priv = netdev_priv(soft_iface);
spin_lock_bh(&bat_priv->forw_bcast_list_lock);
+ if (hlist_unhashed(&forw_packet->list)) {
+ spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+ return;
+ }
hlist_del(&forw_packet->list);
spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
@@ -293,6 +297,10 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
delayed_work);
bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
spin_lock_bh(&bat_priv->forw_bat_list_lock);
+ if (hlist_unhashed(&forw_packet->list)) {
+ spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+ return;
+ }
hlist_del(&forw_packet->list);
spin_unlock_bh(&bat_priv->forw_bat_list_lock);
@@ -316,13 +324,68 @@ out:
batadv_forw_packet_free(forw_packet);
}
+/**
+ * batadv_cancel_packets - Cancels a list of forward packets
+ * @forw_list: The to be canceled forward packets
+ * @canceled_list: The backup list.
+ *
+ * This canceles any scheduled forwarding packet tasks in the provided
+ * forw_list. The packets are being moved from the forw_list to the
+ * canceled_list afterwards to unhash the forward packet list pointer,
+ * allowing any already running task to notice the cancelation.
+ */
+static void batadv_cancel_packets(struct hlist_head *forw_list,
+ struct hlist_head *canceled_list,
+ const struct batadv_hard_iface *hard_iface)
+{
+ struct batadv_forw_packet *forw_packet;
+ struct hlist_node *tmp_node, *safe_tmp_node;
+
+ hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+ forw_list, list) {
+ /* if purge_outstanding_packets() was called with an argument
+ * we delete only packets belonging to the given interface
+ */
+ if ((hard_iface) &&
+ (forw_packet->if_incoming != hard_iface))
+ continue;
+
+ hlist_del_init(&forw_packet->list);
+ hlist_add_head(&forw_packet->canceled_list, canceled_list);
+ }
+}
+
+/**
+ * batadv_canceled_packets_free - Frees canceled forward packets
+ * @head: A list of to be freed forw_packets
+ *
+ * This function canceles the scheduling of any packet in the provided list,
+ * waits for any possibly running packet forwarding thread to finish and
+ * finally, safely frees this forward packet.
+ *
+ * This function might sleep.
+ */
+static void batadv_canceled_packets_free(struct hlist_head *head)
+{
+ struct batadv_forw_packet *forw_packet;
+ struct hlist_node *tmp_node, *safe_tmp_node;
+
+ hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, head,
+ canceled_list) {
+ cancel_delayed_work_sync(&forw_packet->delayed_work);
+
+ hlist_del(&forw_packet->canceled_list);
+ batadv_forw_packet_free(forw_packet);
+ }
+}
+
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface)
{
- struct batadv_forw_packet *forw_packet;
- struct hlist_node *tmp_node, *safe_tmp_node;
- bool pending;
+ struct hlist_head head;
+
+ INIT_HLIST_HEAD(&head);
if (hard_iface)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -334,53 +397,13 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
/* free bcast list */
spin_lock_bh(&bat_priv->forw_bcast_list_lock);
- hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
- &bat_priv->forw_bcast_list, list) {
- /* if purge_outstanding_packets() was called with an argument
- * we delete only packets belonging to the given interface
- */
- if ((hard_iface) &&
- (forw_packet->if_incoming != hard_iface))
- continue;
-
- spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
-
- /* batadv_send_outstanding_bcast_packet() will lock the list to
- * delete the item from the list
- */
- pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
- spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-
- if (pending) {
- hlist_del(&forw_packet->list);
- batadv_forw_packet_free(forw_packet);
- }
- }
+ batadv_cancel_packets(&bat_priv->forw_bcast_list, &head, hard_iface);
spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
/* free batman packet list */
spin_lock_bh(&bat_priv->forw_bat_list_lock);
- hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
- &bat_priv->forw_bat_list, list) {
- /* if purge_outstanding_packets() was called with an argument
- * we delete only packets belonging to the given interface
- */
- if ((hard_iface) &&
- (forw_packet->if_incoming != hard_iface))
- continue;
-
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
- /* send_outstanding_bat_packet() will lock the list to
- * delete the item from the list
- */
- pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
-
- if (pending) {
- hlist_del(&forw_packet->list);
- batadv_forw_packet_free(forw_packet);
- }
- }
+ batadv_cancel_packets(&bat_priv->forw_bat_list, &head, hard_iface);
spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ batadv_canceled_packets_free(&head);
}
diff --git a/types.h b/types.h
index aba8364..f62a35f 100644
--- a/types.h
+++ b/types.h
@@ -853,6 +853,7 @@ struct batadv_skb_cb {
*/
struct batadv_forw_packet {
struct hlist_node list;
+ struct hlist_node canceled_list;
unsigned long send_time;
uint8_t own;
struct sk_buff *skb;
--
1.7.10.4
Scenario:
I have recently moved 13 nodes previously using wds to batman-adv.
One problem that i am encountering is the gateway selection and its
actually defectiveness.
This problem has be seen when the client nodes are set on class 20.
The node clients in fact select the gateways that advertise better link
quality usually above 200 but at the same time some clients end up
connecting to a gateway that is either too slow or unusable either due
to routing or other factors such as distance and even clear visibility.
In one case the node connects to something that defies logic.
I thought about ways to work around this and came up with an idea that
may help which means the creation of 2 new classes.
- Preferred gateway fall-back
- Preferred fixed gateway
For Preferred gateway fall-back:
consider the gateway's advertised quality towards the gateway and stick
with the selection until the gateway disappears returning to it soon as
it returns regardless of any other data.
- Preferred fixed gateway
Manual gateway selection by the node owner or admin. (wds type)
If these classes cannot be created; maybe a plugin of some sort could
be. could be called "gordon".
I understand that this idea may sound useless the same way blocking
originators may be for someone but in some scenarios just like blocking
originators it look to me very valid.
My current problem is that the link quality seems to go up and down like
a roller coaster in a specific order which causes the clients to switch
constantly sometimes many times per hour.
While this does not seem to be an issue for most client nodes; for 2 of
them it means no internet access at all.
The solution for now seems to be having these 2 nodes forced to use 1
specific gateway.
I am quite open to any ideas that may help resolve this issue that has
been casing some "uninformed people" to become headaches.
--
Redes wireless
http://wirelesspt.net
On shutdown a race condition where we access a just freed global TT hash
might occure:
batadv_mesh_free()->batadv_originator_free() schedules the
batadv_orig_node_free_rcu().
Before batadv_orig_node_free_rcu() is executed (which happens on the
rcu_barrier() call in batadv_exit() the latest),
batadv_mesh_free()->batadv_tt_free()->batadv_tt_global_table_free()->
batadv_hash_destroy(hash)->kfree(hash)
is called, freeing the global tt hash.
When batadv_orig_node_free_rcu()->batadv_tt_global_del_orig() now gets
executed it tries to access this just freed global tt hash, causing a
kernel panic.
This patch tries to fix this by waiting for any just scheduled
batadv_orig_node_free_rcu() to finish via an extra rcu_barrier() call
before freeing the global TT hash.
Signed-off-by: Linus Lüssing <linus.luessing(a)web.de>
---
Ref: #169
main.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/main.c b/main.c
index 62b1f89..0afc171 100644
--- a/main.c
+++ b/main.c
@@ -164,6 +164,11 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_gw_node_purge(bat_priv);
batadv_originator_free(bat_priv);
+
+ /* Wait for any batadv_orig_node_free_rcu() to finish,
+ * they access the soon to be freed global TT hash */
+ rcu_barrier();
+
batadv_nc_free(bat_priv);
batadv_tt_free(bat_priv);
--
1.7.10.4
Only one neigh_node per orig_node should match a given
neighbor address, therefore, if more than one matching
neigh_node is found, a WARNING has to be triggered to let
the user know that something is wrong in the originator
state instead of silently skipping the error.
Signed-off-by: Antonio Quartulli <ordex(a)autistici.org>
---
bat_iv_ogm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bat_iv_ogm.c b/bat_iv_ogm.c
index 38183dc..9fbf588 100644
--- a/bat_iv_ogm.c
+++ b/bat_iv_ogm.c
@@ -669,7 +669,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
tmp_neigh_node->if_incoming == if_incoming &&
atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
- if (neigh_node)
+ if (WARN(neigh_node, "too many matching neigh_nodes")
batadv_neigh_node_free_ref(neigh_node);
neigh_node = tmp_neigh_node;
continue;
--
1.8.1.5
From: Antonio Quartulli <antonio(a)open-mesh.com>
Compatibility version is checked upon packet reception
before calling any handler. For this reason it does need to
be checked once more in the handler itself.
Signed-off-by: Antonio Quartulli <antonio(a)open-mesh.com>
---
bat_iv_ogm.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/bat_iv_ogm.c b/bat_iv_ogm.c
index 6d62e29..38183dc 100644
--- a/bat_iv_ogm.c
+++ b/bat_iv_ogm.c
@@ -1054,13 +1054,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
}
rcu_read_unlock();
- if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) {
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Drop packet: incompatible batman version (%i)\n",
- batadv_ogm_packet->header.version);
- return;
- }
-
if (is_my_addr) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: received my own broadcast (sender: %pM)\n",
--
1.8.1.5
From: Antonio Quartulli <antonio(a)open-mesh.com>
the sequence number is not stored in struct neigh_node,
therefore there is no need to pass such value to the
neigh_node creation procedure.
At the moment the value is only used by a debug message, but
given the fact that the seqno is not related to the neighbor
object, it is better to print it elsewhere.
Signed-off-by: Antonio Quartulli <antonio(a)open-mesh.com>
---
do we really want to pass the seqno over every function just to print it at the
end? If this value is really important (debug? don't know) it should be printed
in a proper place, e.g. in the function that triggered the creation and that
possibly has the seqno for other purposes...
Otherwise we would not be able to create a neigh_nod unless we have a seqno to
pass.
Cheers,
bat_iv_ogm.c | 11 ++++-------
originator.c | 5 ++---
originator.h | 2 +-
3 files changed, 7 insertions(+), 11 deletions(-)
diff --git a/bat_iv_ogm.c b/bat_iv_ogm.c
index 071f288..da239c5 100644
--- a/bat_iv_ogm.c
+++ b/bat_iv_ogm.c
@@ -33,12 +33,11 @@ static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh, __be32 seqno)
+ struct batadv_orig_node *orig_neigh)
{
struct batadv_neigh_node *neigh_node;
- neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr,
- ntohl(seqno));
+ neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr);
if (!neigh_node)
goto out;
@@ -696,8 +695,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
ethhdr->h_source,
- orig_node, orig_tmp,
- batadv_ogm_packet->seqno);
+ orig_node, orig_tmp);
batadv_orig_node_free_ref(orig_tmp);
if (!neigh_node)
@@ -829,8 +827,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
orig_neigh_node->orig,
orig_neigh_node,
- orig_neigh_node,
- batadv_ogm_packet->seqno);
+ orig_neigh_node);
if (!neigh_node)
goto out;
diff --git a/originator.c b/originator.c
index 2f34525..ddd417c 100644
--- a/originator.c
+++ b/originator.c
@@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node)
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr, uint32_t seqno)
+ const uint8_t *neigh_addr)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_neigh_node *neigh_node;
@@ -110,8 +110,7 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
atomic_set(&neigh_node->refcount, 2);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Creating new neighbor %pM, initial seqno %d\n",
- neigh_addr, seqno);
+ "Creating new neighbor %pM\n", neigh_addr);
out:
return neigh_node;
diff --git a/originator.h b/originator.h
index 7df48fa..476c623 100644
--- a/originator.h
+++ b/originator.h
@@ -30,7 +30,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr, uint32_t seqno);
+ const uint8_t *neigh_addr);
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node);
struct batadv_neigh_node *
batadv_orig_node_get_router(struct batadv_orig_node *orig_node);
--
1.8.1.5
tags 703540 + pending patch
thanks
On Wednesday 20 March 2013 17:51:54 you wrote:
> While building the package using our research compiler infrastructure we
> noticed conflicting types being used in the linked executable. This is due
> to _GNU_SOURCE being defined in linux/route.c and posix/unix_socket.c, but
> not in any other file. As a result, system headers expand to conflicting
> declarations. (This was at least noticed for the sendto function, but may
> extend to others.)
>
> Either all or no file should #define _GNU_SOURCE.
Please add information how to reproduce this the next time you are adding such
such a bug. Now I can just assume what you are writing is true (even when the
man page about sendto says otherwise). Not knowing how to reproduce it in the
best possible way just makes it harder for everyone to check the impact of the
problem.
I've forwarded it to the upstream maintainer and attached the change for
Debian.
Kind regards,
Sven