From: Arnd Bergmann <arnd(a)arndb.de>
All file_operations should get a .llseek
operation so we can make nonseekable_open
the default for future file operations
without a .llseek pointer.
The three cases that we can automatically
detect are no_llseek, seq_lseek and
default_llseek. For cases where we can
we can automatically prove that the
file offset is always ignored, we use
noop_llseek, which maintains the current
behavior of not returning an error from
a seek.
New drivers should normally not use
noop_llseek but instead use no_llseek and
call nonseekable_open at open time.
Existing drivers can be converted to do
the same when the maintainer knows for
certain that no user code relies on calling
seek on the device file.
The generated code is often incorrectly
indented and right now contains comments that
clarify for each added line why a specific
variant was chosen. In the version that gets
submitted upstream, the comments will be gone
and I will manually fix the indentation,
because there does not seem to be a way to
do that using coccinelle.
Some amount of new code is currently sitting
in linux-next that should get the same
modifications, which I will do at the end of
the merge window.
Many thanks to Julia Lawall for helping me
learn to write a semantic patch that does
all this.
===== begin semantic patch =====
// This adds an llseek= method to all file operations,
// as a preparation for making no_llseek the default.
//
// The rules are
// - use no_llseek explicitly if we do nonseekable_open
// - use seq_lseek for sequential files
// - use default_llseek if we know we access f_pos
// - use noop_llseek if we know we don't access f_pos,
// but we still want to allow users to call lseek
//
@ open1 exists @
identifier nested_open;
@@
nested_open(...)
{
<+...
nonseekable_open(...)
...+>
}
@ open exists@
identifier open_f;
identifier i, f;
identifier open1.nested_open;
@@
int open_f(struct inode *i, struct file *f)
{
<+...
(
nonseekable_open(...)
|
nested_open(...)
)
...+>
}
@ read disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
<+...
(
*off = E
|
*off += E
|
func(..., off, ...)
|
E = *off
)
...+>
}
@ read_no_fpos disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
... when != off
}
@ write @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
<+...
(
*off = E
|
*off += E
|
func(..., off, ...)
|
E = *off
)
...+>
}
@ write_no_fpos @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
... when != off
}
@ fops0 @
identifier fops;
@@
struct file_operations fops = {
...
};
@ has_llseek depends on fops0 @
identifier fops0.fops;
identifier llseek_f;
@@
struct file_operations fops = {
...
.llseek = llseek_f,
...
};
@ has_read depends on fops0 @
identifier fops0.fops;
identifier read_f;
@@
struct file_operations fops = {
...
.read = read_f,
...
};
@ has_write depends on fops0 @
identifier fops0.fops;
identifier write_f;
@@
struct file_operations fops = {
...
.write = write_f,
...
};
@ has_open depends on fops0 @
identifier fops0.fops;
identifier open_f;
@@
struct file_operations fops = {
...
.open = open_f,
...
};
// use no_llseek if we call nonseekable_open
////////////////////////////////////////////
@ nonseekable1 depends on !has_llseek && has_open @
identifier fops0.fops;
identifier nso ~= "nonseekable_open";
@@
struct file_operations fops = {
... .open = nso, ...
+.llseek = no_llseek, /* nonseekable */
};
@ nonseekable2 depends on !has_llseek @
identifier fops0.fops;
identifier open.open_f;
@@
struct file_operations fops = {
... .open = open_f, ...
+.llseek = no_llseek, /* open uses nonseekable */
};
// use seq_lseek for sequential files
/////////////////////////////////////
@ seq depends on !has_llseek @
identifier fops0.fops;
identifier sr ~= "seq_read";
@@
struct file_operations fops = {
... .read = sr, ...
+.llseek = seq_lseek, /* we have seq_read */
};
// use default_llseek if there is a readdir
///////////////////////////////////////////
@ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier readdir_e;
@@
// any other fop is used that changes pos
struct file_operations fops = {
... .readdir = readdir_e, ...
+.llseek = default_llseek, /* readdir is present */
};
// use default_llseek if at least one of read/write touches f_pos
/////////////////////////////////////////////////////////////////
@ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read.read_f;
@@
// read fops use offset
struct file_operations fops = {
... .read = read_f, ...
+.llseek = default_llseek, /* read accesses f_pos */
};
@ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write.write_f;
@@
// write fops use offset
struct file_operations fops = {
... .write = write_f, ...
+ .llseek = default_llseek, /* write accesses f_pos */
};
// Use noop_llseek if neither read nor write accesses f_pos
///////////////////////////////////////////////////////////
@ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
identifier write_no_fpos.write_f;
@@
// write fops use offset
struct file_operations fops = {
...
.write = write_f,
.read = read_f,
...
+.llseek = noop_llseek, /* read and write both use no f_pos */
};
@ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write_no_fpos.write_f;
@@
struct file_operations fops = {
... .write = write_f, ...
+.llseek = noop_llseek, /* write uses no f_pos */
};
@ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
@@
struct file_operations fops = {
... .read = read_f, ...
+.llseek = noop_llseek, /* read uses no f_pos */
};
@ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
@@
struct file_operations fops = {
...
+.llseek = noop_llseek, /* no read or write fn */
};
===== End semantic patch =====
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Cc: Julia Lawall <julia(a)diku.dk>
Cc: Christoph Hellwig <hch(a)infradead.org>
Signed-off-by: Sven Eckelmann <sven.eckelmann(a)gmx.de>
---
batman-adv/bat_debugfs.c | 1 +
batman-adv/icmp_socket.c | 1 +
2 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/batman-adv/bat_debugfs.c b/batman-adv/bat_debugfs.c
index c73ce4a..0b35616 100644
--- a/batman-adv/bat_debugfs.c
+++ b/batman-adv/bat_debugfs.c
@@ -177,6 +177,7 @@ static const struct file_operations log_fops = {
.release = log_release,
.read = log_read,
.poll = log_poll,
+ .llseek = noop_llseek,/* read uses no f_pos */
};
static int debug_log_setup(struct bat_priv *bat_priv)
diff --git a/batman-adv/icmp_socket.c b/batman-adv/icmp_socket.c
index 85c047b..bc5e25a 100644
--- a/batman-adv/icmp_socket.c
+++ b/batman-adv/icmp_socket.c
@@ -285,6 +285,7 @@ static const struct file_operations fops = {
.read = bat_socket_read,
.write = bat_socket_write,
.poll = bat_socket_poll,
+ .llseek = noop_llseek,/* read and write both use no f_pos */
};
int bat_socket_setup(struct bat_priv *bat_priv)
--
1.7.2.3
synchronize_rcu respective synchronize_net only waits for the rcu grace
period to elapse and we may fail to finish the calls which were made to
call_rcu in that time. In result the module could be unloaded during the
execution of the RCU callbacks.
rcu_barrier[1] will now wait for all outstanding RCU callbacks to finish
before continuing.
[1] Documentation/RCU/rcubarrier.txt
Signed-off-by: Sven Eckelmann <sven.eckelmann(a)gmx.de>
---
batman-adv/main.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/batman-adv/main.c b/batman-adv/main.c
index e8acb46..a067f75 100644
--- a/batman-adv/main.c
+++ b/batman-adv/main.c
@@ -74,7 +74,7 @@ static void __exit batman_exit(void)
destroy_workqueue(bat_event_workqueue);
bat_event_workqueue = NULL;
- synchronize_net();
+ rcu_barrier();
}
int mesh_init(struct net_device *soft_iface)
--
1.7.2.3
Hi,
I tried to check through the code and identify problems not yet mentioned by
Paul E. McKenney. They were found by reading through
Documentation/RCU/checklist.txt and related documents in the folder.
It doesn't address the reference counting problem for gw_nodes and interfaces.
Those leaks are happen in gw_election, get_batman_if_by_netdev and
get_active_batman_if. We must increase the refcnt (using atomic_inc) inside the
rcu_read_lock()..rcu_read_unlock() before we attach to the structure it
"leaks". When another function now removed it from its usage context
(primary_if, usage on stack, ...) then atomic_dec_and_test the refcnt. If it is
decremented to zero then we can issue the call_rcu to the freeing function. So
"put" of those functions is not allowed inside an rcu_read_lock. As said before
the hold must always be called inside a rcu_read_lock.
Best regards,
Sven
Hi folks,
recently, I came across a nasty issue which hasn't been solved yet. The
problem begins to show up when you try to connect multiple batman-adv mesh
node to the same LAN network. If batman-adv is bridged into the LAN and the
nodes have a decent connection to each other you are about to create an
ethernet loop which will take out your entire network.
A simple visualization of the loop:
node1 <-- LAN --> node2
| |
wifi <-- mesh --> wifi
Let's assume a packet from the LAN arrives at node1 which then floods the mesh
with that new packet. Node2 receives the packet via the mesh and forwards it
to the LAN where node1 receives it ...
If there wasn't the LAN connection this would not happen because batman-adv
provides a flood/loop protection inside the batman header but as soon as the
packet gets bridged this information is stripped from the packet. Every batman
node connected to the LAN will think: Hey, it is a new packet!
A common solution to avoid bridge loops is to deploy protocols like STP or one
of its derivates. STP would detect the loop and close ports to avoid it.
Running STP over the mesh is not really what we want as STP has no clue about
the link qualities and who wants to run a spanning tree over lossy links ?
So, batman-adv needs it own mechanism to detect other batman nodes connected
to the same LAN and then close the appropriate ports. As a followup to this
mail I propose a patch which does exactly that. It will detect OGMs that come
in via the batX interface and interprets them as "port announcements".
Internally, it keeps a list of all LAN neighbors and selects the one with the
smallest mac address as gateway to the LAN. All traffic that should go to the
LAN is forwarded to this node. Traffic from the LAN is simply dropped - only the
smallest mac node will forward it to the mesh.
Simple steps to see it in action:
* add your wifi interface
-> batctl if add wlan0
* create a bridge for bat0 and your lan
-> brctl addbr br-lan
-> brctl addif br-lan eth0
-> brctl addif br-lan bat0
* activate batman on the lan
-> batctl if add br-lan
The patch can also deal with vlans on top of batX and offers a list of LAN
neighbors via debugfs (batctl support is yet to come).
Feedback is welcome! :-)
Regards,
Marek
Hi,
things that changed since the last patch:
- add support to fragment packets on every node,
if outgoing iface mtu is smaller as the packet
- add support to defragment packets on every node,
if outgoing iface mtu is >= original size
- rename some functions
In this patch every node defragment packets if it can. Do you think it
is better to add an option to enable/disable this ?
regards,
Andreas
checkpatch now detects the start of a comment and warns about usage of
multiple spaces at the beginning of a line. We have to replace the ' '
in multiple lines comments by ' * ' to fix it.
Checkpatch also wants a comment after a definition of a spinlock_t which
describes what it protects. It is currently not possible to add it
before the actual struct which includes the spinlock.
Signed-off-by: Sven Eckelmann <sven.eckelmann(a)gmx.de>
---
batman-adv/packet.h | 2 +-
batman-adv/types.h | 21 +++++++++++----------
batman-adv/vis.c | 16 ++++++++--------
3 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/batman-adv/packet.h b/batman-adv/packet.h
index 87e652e..b49fdf7 100644
--- a/batman-adv/packet.h
+++ b/batman-adv/packet.h
@@ -81,7 +81,7 @@ struct icmp_packet {
#define BAT_RR_LEN 16
/* icmp_packet_rr must start with all fields from imcp_packet
- as this is assumed by code that handles ICMP packets */
+ * as this is assumed by code that handles ICMP packets */
struct icmp_packet_rr {
uint8_t packet_type;
uint8_t version; /* batman version field */
diff --git a/batman-adv/types.h b/batman-adv/types.h
index 1e736fb..e7b53a4 100644
--- a/batman-adv/types.h
+++ b/batman-adv/types.h
@@ -118,6 +118,7 @@ struct neigh_node {
struct batman_if *if_incoming;
};
+
struct bat_priv {
atomic_t mesh_state;
struct net_device_stats stats;
@@ -145,14 +146,14 @@ struct bat_priv {
struct hashtable_t *hna_local_hash;
struct hashtable_t *hna_global_hash;
struct hashtable_t *vis_hash;
- spinlock_t orig_hash_lock;
- spinlock_t forw_bat_list_lock;
- spinlock_t forw_bcast_list_lock;
- spinlock_t hna_lhash_lock;
- spinlock_t hna_ghash_lock;
- spinlock_t gw_list_lock;
- spinlock_t vis_hash_lock;
- spinlock_t vis_list_lock;
+ spinlock_t orig_hash_lock; /* protects orig_hash */
+ spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
+ spinlock_t forw_bcast_list_lock; /* protects */
+ spinlock_t hna_lhash_lock; /* protects hna_local_hash */
+ spinlock_t hna_ghash_lock; /* protects hna_global_hash */
+ spinlock_t gw_list_lock; /* protects gw_list */
+ spinlock_t vis_hash_lock; /* protects vis_hash */
+ spinlock_t vis_list_lock; /* protects vis_info::recv_list */
int16_t num_local_hna;
atomic_t hna_local_changed;
struct delayed_work hna_work;
@@ -166,7 +167,7 @@ struct socket_client {
struct list_head queue_list;
unsigned int queue_len;
unsigned char index;
- spinlock_t lock;
+ spinlock_t lock; /* protects queue_list, queue_len, index */
wait_queue_head_t queue_wait;
struct bat_priv *bat_priv;
};
@@ -218,7 +219,7 @@ struct debug_log {
char log_buff[LOG_BUF_LEN];
unsigned long log_start;
unsigned long log_end;
- spinlock_t lock;
+ spinlock_t lock; /* protects log_buff, log_start and log_end */
wait_queue_head_t queue_wait;
};
diff --git a/batman-adv/vis.c b/batman-adv/vis.c
index 6de5c76..ddfdcd8 100644
--- a/batman-adv/vis.c
+++ b/batman-adv/vis.c
@@ -34,14 +34,14 @@
#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u)))
/* Checks if a sequence number x is a predecessor/successor of y.
- they handle overflows/underflows and can correctly check for a
- predecessor/successor unless the variable sequence number has grown by
- more then 2**(bitwidth(x)-1)-1.
- This means that for a uint8_t with the maximum value 255, it would think:
- * when adding nothing - it is neither a predecessor nor a successor
- * before adding more than 127 to the starting value - it is a predecessor,
- * when adding 128 - it is neither a predecessor nor a successor,
- * after adding more than 127 to the starting value - it is a successor */
+ * they handle overflows/underflows and can correctly check for a
+ * predecessor/successor unless the variable sequence number has grown by
+ * more then 2**(bitwidth(x)-1)-1.
+ * This means that for a uint8_t with the maximum value 255, it would think:
+ * - when adding nothing - it is neither a predecessor nor a successor
+ * - before adding more than 127 to the starting value - it is a predecessor,
+ * - when adding 128 - it is neither a predecessor nor a successor,
+ * - after adding more than 127 to the starting value - it is a successor */
#define seq_before(x, y) ({typeof(x) _dummy = (x - y); \
_dummy > smallest_signed_int(_dummy); })
#define seq_after(x, y) seq_before(y, x)
--
1.7.1
Hi,
here are some smaller cleanup/bugfix patches for 2.6.37. All patches needed for
that patchset are already part of your staging-next tree.
All patches are cleanup patches and no new feature is added.
thanks,
Sven
Linus Lüssing (1):
Staging: batman-adv: Always synchronize rcu's on module shutdown
Marek Lindner (1):
Staging: batman-adv: removing redundant assignment of skb->dev
Sven Eckelmann (2):
Staging: batman-adv: Remove currently unused gw_* datastructures
Staging: batman-adv: Add rcu TODO
Vasiliy Kulikov (1):
Staging: batman-adv: check kmalloc() return value
drivers/staging/batman-adv/TODO | 5 +++++
drivers/staging/batman-adv/main.c | 7 ++-----
drivers/staging/batman-adv/routing.c | 8 ++++++--
drivers/staging/batman-adv/soft-interface.c | 5 +----
drivers/staging/batman-adv/types.h | 3 ---
drivers/staging/batman-adv/unicast.c | 8 ++++++--
drivers/staging/batman-adv/unicast.h | 2 +-
7 files changed, 21 insertions(+), 17 deletions(-)
During the module shutdown procedure in batman_exit(), a rcu callback is
being scheduled (batman_exit -> hardif_remove_interfaces ->
hardif_remove_interfae -> call_rcu). However, when the kernel unloads
the module, the rcu callback might not have been executed yet, resulting
in a "unable to handle kernel paging request" in __rcu_process_callback
afterwards, causing the kernel to freeze.
Therefore, we should always flush all rcu callback functions scheduled
during the shutdown procedure.
Signed-off-by: Linus Lüssing <linus.luessing(a)web.de>
---
main.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/main.c b/main.c
index 209a46b..e8acb46 100644
--- a/main.c
+++ b/main.c
@@ -73,6 +73,8 @@ static void __exit batman_exit(void)
flush_workqueue(bat_event_workqueue);
destroy_workqueue(bat_event_workqueue);
bat_event_workqueue = NULL;
+
+ synchronize_net();
}
int mesh_init(struct net_device *soft_iface)
@@ -135,9 +137,6 @@ void mesh_free(struct net_device *soft_iface)
hna_local_free(bat_priv);
hna_global_free(bat_priv);
- synchronize_net();
-
- synchronize_rcu();
atomic_set(&bat_priv->mesh_state, MESH_INACTIVE);
}
--
1.7.1