The following commit has been merged in the master branch: commit 3bad2f1c676581d01e7645eb03e9b27e28b0a92e Merge: b4b8cbf679c4866a523a35d1454884a31bd5d8dc 8c6657cb50cb037ff58b3f6a547c6569568f3527 Author: Linus Torvalds torvalds@linux-foundation.org Date: Wed Jul 5 13:13:32 2017 -0700
Merge branch 'work.misc-set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull misc user access cleanups from Al Viro: "The first pile is assorted getting rid of cargo-culted access_ok(), cargo-culted set_fs() and field-by-field copyouts.
The same description applies to a lot of stuff in other branches - this is just the stuff that didn't fit into a more specific topical branch"
* 'work.misc-set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: Switch flock copyin/copyout primitives to copy_{from,to}_user() fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be found fs/fcntl: f_setown, avoid undefined behaviour fs/fcntl: f_setown, allow returning error lpfc debugfs: get rid of pointless access_ok() adb: get rid of pointless access_ok() isdn: get rid of pointless access_ok() compat statfs: switch to copy_to_user() fs/locks: don't mess with the address limit in compat_fcntl64 nfsd_readlink(): switch to vfs_get_link() drbd: ->sendpage() never needed set_fs() fs/locks: pass kernel struct flock to fcntl_getlk/setlk fs: locks: Fix some troubles at kernel-doc comments
diff --combined drivers/block/drbd/drbd_main.c index 5fb99e06ebe4,da4944ea974e..e2ed28d45ce1 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@@ -128,7 -128,6 +128,7 @@@ mempool_t *drbd_request_mempool mempool_t *drbd_ee_mempool; mempool_t *drbd_md_io_page_pool; struct bio_set *drbd_md_io_bio_set; +struct bio_set *drbd_io_bio_set;
/* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@@ -1551,7 -1550,6 +1551,6 @@@ static int _drbd_send_page(struct drbd_ int offset, size_t size, unsigned msg_flags) { struct socket *socket = peer_device->connection->data.socket; - mm_segment_t oldfs = get_fs(); int len = size; int err = -EIO;
@@@ -1566,7 -1564,6 +1565,6 @@@
msg_flags |= MSG_NOSIGNAL; drbd_update_congested(peer_device->connection); - set_fs(KERNEL_DS); do { int sent;
@@@ -1586,7 -1583,6 +1584,6 @@@ len -= sent; offset += sent; } while (len > 0 /* THINK && device->cstate >= C_CONNECTED*/); - set_fs(oldfs); clear_bit(NET_CONGESTED, &peer_device->connection->flags);
if (len == 0) { @@@ -2099,8 -2095,6 +2096,8 @@@ static void drbd_destroy_mempools(void
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
+ if (drbd_io_bio_set) + bioset_free(drbd_io_bio_set); if (drbd_md_io_bio_set) bioset_free(drbd_md_io_bio_set); if (drbd_md_io_page_pool) @@@ -2118,7 -2112,6 +2115,7 @@@ if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache);
+ drbd_io_bio_set = NULL; drbd_md_io_bio_set = NULL; drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; @@@ -2146,7 -2139,6 +2143,7 @@@ static int drbd_create_mempools(void drbd_pp_pool = NULL; drbd_md_io_page_pool = NULL; drbd_md_io_bio_set = NULL; + drbd_io_bio_set = NULL;
/* caches */ drbd_request_cache = kmem_cache_create( @@@ -2170,13 -2162,7 +2167,13 @@@ goto Enomem;
/* mempools */ - drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); + drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER); + if (drbd_io_bio_set == NULL) + goto Enomem; + + drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, + BIOSET_NEED_BVECS | + BIOSET_NEED_RESCUER); if (drbd_md_io_bio_set == NULL) goto Enomem;
@@@ -2850,6 -2836,7 +2847,6 @@@ enum drbd_ret_code drbd_create_device(s /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); - blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); q->queue_lock = &resource->req_lock;
device->md_io.page = alloc_page(GFP_KERNEL); diff --combined drivers/isdn/i4l/isdn_ppp.c index 88e5a025cea7,487478bafaf5..6c44609fd83a --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@@ -795,9 -795,6 +795,6 @@@ isdn_ppp_read(int min, struct file *fil if (!(is->state & IPPP_OPEN)) return 0;
- if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - spin_lock_irqsave(&is->buflock, flags); b = is->first->next; save_buf = b->buf; @@@ -1312,7 -1309,7 +1309,7 @@@ isdn_ppp_xmit(struct sk_buff *skb, stru /* check if we should pass this packet * the filter instructions are constructed assuming * a four-byte PPP header on each packet */ - *skb_push(skb, 4) = 1; /* indicate outbound */ + *(u8 *)skb_push(skb, 4) = 1; /* indicate outbound */
{ __be16 *p = (__be16 *)skb->data; @@@ -1509,7 -1506,7 +1506,7 @@@ int isdn_ppp_autodial_filter(struct sk_ * temporarily remove part of the fake header stuck on * earlier. */ - *skb_pull(skb, IPPP_MAX_HEADER - 4) = 1; /* indicate outbound */ + *(u8 *)skb_pull(skb, IPPP_MAX_HEADER - 4) = 1; /* indicate outbound */
{ __be16 *p = (__be16 *)skb->data; @@@ -2014,9 -2011,6 +2011,6 @@@ isdn_ppp_dev_ioctl_stats(int slot, stru struct ppp_stats t; isdn_net_local *lp = netdev_priv(dev);
- if (!access_ok(VERIFY_WRITE, res, sizeof(struct ppp_stats))) - return -EFAULT; - /* build a temporary stat struct and copy it to user space */
memset(&t, 0, sizeof(struct ppp_stats)); @@@ -2258,7 -2252,8 +2252,7 @@@ static void isdn_ppp_ccp_xmit_reset(str
/* Now stuff remaining bytes */ if (len) { - p = skb_put(skb, len); - memcpy(p, data, len); + skb_put_data(skb, data, len); }
/* skb is now ready for xmit */ @@@ -2363,7 -2358,7 +2357,7 @@@ static struct ippp_ccp_reset_state *isd id); return NULL; } else { - rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_KERNEL); + rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_ATOMIC); if (!rs) return NULL; rs->state = CCPResetIdle; diff --combined drivers/isdn/isdnloop/isdnloop.c index 7ac7badb8f55,32cb0cbd7217..6ffd13466b8c --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@@ -479,7 -479,7 +479,7 @@@ isdnloop_fake(isdnloop_card *card, cha } if (ch >= 0) sprintf(skb_put(skb, 3), "%02d;", ch); - memcpy(skb_put(skb, strlen(s)), s, strlen(s)); + skb_put_data(skb, s, strlen(s)); skb_queue_tail(&card->dqueue, skb); return 0; } @@@ -1142,8 -1142,6 +1142,6 @@@ isdnloop_command(isdn_ctrl *c, isdnloop case ISDNLOOP_IOCTL_DEBUGVAR: return (ulong) card; case ISDNLOOP_IOCTL_STARTUP: - if (!access_ok(VERIFY_READ, (void *) a, sizeof(isdnloop_sdef))) - return -EFAULT; return isdnloop_start(card, (isdnloop_sdef *) a); break; case ISDNLOOP_IOCTL_ADDCARD: diff --combined fs/fcntl.c index ed051f825bad,f525bc75c07d..b6bd89628025 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@@ -109,20 -109,34 +109,34 @@@ void __f_setown(struct file *filp, stru } EXPORT_SYMBOL(__f_setown);
- void f_setown(struct file *filp, unsigned long arg, int force) + int f_setown(struct file *filp, unsigned long arg, int force) { enum pid_type type; - struct pid *pid; - int who = arg; + struct pid *pid = NULL; + int who = arg, ret = 0; + type = PIDTYPE_PID; if (who < 0) { + /* avoid overflow below */ + if (who == INT_MIN) + return -EINVAL; + type = PIDTYPE_PGID; who = -who; } + rcu_read_lock(); - pid = find_vpid(who); - __f_setown(filp, pid, type, force); + if (who) { + pid = find_vpid(who); + if (!pid) + ret = -ESRCH; + } + + if (!ret) + __f_setown(filp, pid, type, force); rcu_read_unlock(); + + return ret; } EXPORT_SYMBOL(f_setown);
@@@ -243,70 -257,11 +257,72 @@@ static int f_getowner_uids(struct file } #endif
+static bool rw_hint_valid(enum rw_hint hint) +{ + switch (hint) { + case RWF_WRITE_LIFE_NOT_SET: + case RWH_WRITE_LIFE_NONE: + case RWH_WRITE_LIFE_SHORT: + case RWH_WRITE_LIFE_MEDIUM: + case RWH_WRITE_LIFE_LONG: + case RWH_WRITE_LIFE_EXTREME: + return true; + default: + return false; + } +} + +static long fcntl_rw_hint(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file_inode(file); + u64 *argp = (u64 __user *)arg; + enum rw_hint hint; + u64 h; + + switch (cmd) { + case F_GET_FILE_RW_HINT: + h = file_write_hint(file); + if (copy_to_user(argp, &h, sizeof(*argp))) + return -EFAULT; + return 0; + case F_SET_FILE_RW_HINT: + if (copy_from_user(&h, argp, sizeof(h))) + return -EFAULT; + hint = (enum rw_hint) h; + if (!rw_hint_valid(hint)) + return -EINVAL; + + spin_lock(&file->f_lock); + file->f_write_hint = hint; + spin_unlock(&file->f_lock); + return 0; + case F_GET_RW_HINT: + h = inode->i_write_hint; + if (copy_to_user(argp, &h, sizeof(*argp))) + return -EFAULT; + return 0; + case F_SET_RW_HINT: + if (copy_from_user(&h, argp, sizeof(h))) + return -EFAULT; + hint = (enum rw_hint) h; + if (!rw_hint_valid(hint)) + return -EINVAL; + + inode_lock(inode); + inode->i_write_hint = hint; + inode_unlock(inode); + return 0; + default: + return -EINVAL; + } +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { + void __user *argp = (void __user *)arg; + struct flock flock; long err = -EINVAL;
switch (cmd) { @@@ -334,7 -289,11 +350,11 @@@ case F_OFD_GETLK: #endif case F_GETLK: - err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); + if (copy_from_user(&flock, argp, sizeof(flock))) + return -EFAULT; + err = fcntl_getlk(filp, cmd, &flock); + if (!err && copy_to_user(argp, &flock, sizeof(flock))) + return -EFAULT; break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ @@@ -344,7 -303,9 +364,9 @@@ /* Fallthrough */ case F_SETLK: case F_SETLKW: - err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); + if (copy_from_user(&flock, argp, sizeof(flock))) + return -EFAULT; + err = fcntl_setlk(fd, filp, cmd, &flock); break; case F_GETOWN: /* @@@ -358,8 -319,7 +380,7 @@@ force_successful_syscall_return(); break; case F_SETOWN: - f_setown(filp, arg, 1); - err = 0; + err = f_setown(filp, arg, 1); break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); @@@ -398,12 -358,6 +419,12 @@@ case F_GET_SEALS: err = shmem_fcntl(filp, cmd, arg); break; + case F_GET_RW_HINT: + case F_SET_RW_HINT: + case F_GET_FILE_RW_HINT: + case F_SET_FILE_RW_HINT: + err = fcntl_rw_hint(filp, cmd, arg); + break; default: break; } @@@ -450,7 -404,9 +471,9 @@@ out SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { + void __user *argp = (void __user *)arg; struct fd f = fdget_raw(fd); + struct flock64 flock; long err = -EBADF;
if (!f.file) @@@ -468,14 -424,21 +491,21 @@@ switch (cmd) { case F_GETLK64: case F_OFD_GETLK: - err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); + err = -EFAULT; + if (copy_from_user(&flock, argp, sizeof(flock))) + break; + err = fcntl_getlk64(f.file, cmd, &flock); + if (!err && copy_to_user(argp, &flock, sizeof(flock))) + err = -EFAULT; break; case F_SETLK64: case F_SETLKW64: case F_OFD_SETLK: case F_OFD_SETLKW: - err = fcntl_setlk64(fd, f.file, cmd, - (struct flock64 __user *) arg); + err = -EFAULT; + if (copy_from_user(&flock, argp, sizeof(flock))) + break; + err = fcntl_setlk64(fd, f.file, cmd, &flock); break; default: err = do_fcntl(fd, cmd, arg, f.file); @@@ -489,57 -452,56 +519,56 @@@ out #endif
#ifdef CONFIG_COMPAT + /* careful - don't use anywhere else */ + #define copy_flock_fields(from, to) \ + (to).l_type = (from).l_type; \ + (to).l_whence = (from).l_whence; \ + (to).l_start = (from).l_start; \ + (to).l_len = (from).l_len; \ + (to).l_pid = (from).l_pid; + static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) { - if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || - __get_user(kfl->l_type, &ufl->l_type) || - __get_user(kfl->l_whence, &ufl->l_whence) || - __get_user(kfl->l_start, &ufl->l_start) || - __get_user(kfl->l_len, &ufl->l_len) || - __get_user(kfl->l_pid, &ufl->l_pid)) + struct compat_flock fl; + + if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) return -EFAULT; + copy_flock_fields(*kfl, fl); return 0; }
- static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) + static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) { - if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || - __put_user(kfl->l_type, &ufl->l_type) || - __put_user(kfl->l_whence, &ufl->l_whence) || - __put_user(kfl->l_start, &ufl->l_start) || - __put_user(kfl->l_len, &ufl->l_len) || - __put_user(kfl->l_pid, &ufl->l_pid)) + struct compat_flock64 fl; + + if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) return -EFAULT; + copy_flock_fields(*kfl, fl); return 0; }
- #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 - static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) + static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) { - if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || - __get_user(kfl->l_type, &ufl->l_type) || - __get_user(kfl->l_whence, &ufl->l_whence) || - __get_user(kfl->l_start, &ufl->l_start) || - __get_user(kfl->l_len, &ufl->l_len) || - __get_user(kfl->l_pid, &ufl->l_pid)) + struct compat_flock fl; + + memset(&fl, 0, sizeof(struct compat_flock)); + copy_flock_fields(fl, *kfl); + if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) return -EFAULT; return 0; } - #endif
- #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) { - if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || - __put_user(kfl->l_type, &ufl->l_type) || - __put_user(kfl->l_whence, &ufl->l_whence) || - __put_user(kfl->l_start, &ufl->l_start) || - __put_user(kfl->l_len, &ufl->l_len) || - __put_user(kfl->l_pid, &ufl->l_pid)) + struct compat_flock64 fl; + + memset(&fl, 0, sizeof(struct compat_flock64)); + copy_flock_fields(fl, *kfl); + if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) return -EFAULT; return 0; } - #endif + #undef copy_flock_fields
static unsigned int convert_fcntl_cmd(unsigned int cmd) @@@ -556,76 -518,92 +585,92 @@@ return cmd; }
+ /* + * GETLK was successful and we need to return the data, but it needs to fit in + * the compat structure. + * l_start shouldn't be too big, unless the original start + end is greater than + * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return + * -EOVERFLOW in that case. l_len could be too big, in which case we just + * truncate it, and only allow the app to see that part of the conflicting lock + * that might make sense to it anyway + */ + static int fixup_compat_flock(struct flock *flock) + { + if (flock->l_start > COMPAT_OFF_T_MAX) + return -EOVERFLOW; + if (flock->l_len > COMPAT_OFF_T_MAX) + flock->l_len = COMPAT_OFF_T_MAX; + return 0; + } + COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { - mm_segment_t old_fs; - struct flock f; - long ret; - unsigned int conv_cmd; + struct fd f = fdget_raw(fd); + struct flock flock; + long err = -EBADF; + + if (!f.file) + return err; + + if (unlikely(f.file->f_mode & FMODE_PATH)) { + if (!check_fcntl_cmd(cmd)) + goto out_put; + } + + err = security_file_fcntl(f.file, cmd, arg); + if (err) + goto out_put;
switch (cmd) { case F_GETLK: + err = get_compat_flock(&flock, compat_ptr(arg)); + if (err) + break; + err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); + if (err) + break; + err = fixup_compat_flock(&flock); + if (err) + return err; + err = put_compat_flock(&flock, compat_ptr(arg)); + break; + case F_GETLK64: + case F_OFD_GETLK: + err = get_compat_flock64(&flock, compat_ptr(arg)); + if (err) + break; + err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); + if (err) + break; + err = fixup_compat_flock(&flock); + if (err) + return err; + err = put_compat_flock64(&flock, compat_ptr(arg)); + break; case F_SETLK: case F_SETLKW: - ret = get_compat_flock(&f, compat_ptr(arg)); - if (ret != 0) + err = get_compat_flock(&flock, compat_ptr(arg)); + if (err) break; - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_fcntl(fd, cmd, (unsigned long)&f); - set_fs(old_fs); - if (cmd == F_GETLK && ret == 0) { - /* GETLK was successful and we need to return the data... - * but it needs to fit in the compat structure. - * l_start shouldn't be too big, unless the original - * start + end is greater than COMPAT_OFF_T_MAX, in which - * case the app was asking for trouble, so we return - * -EOVERFLOW in that case. - * l_len could be too big, in which case we just truncate it, - * and only allow the app to see that part of the conflicting - * lock that might make sense to it anyway - */ - - if (f.l_start > COMPAT_OFF_T_MAX) - ret = -EOVERFLOW; - if (f.l_len > COMPAT_OFF_T_MAX) - f.l_len = COMPAT_OFF_T_MAX; - if (ret == 0) - ret = put_compat_flock(&f, compat_ptr(arg)); - } + err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); break; - - case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_OFD_GETLK: case F_OFD_SETLK: case F_OFD_SETLKW: - ret = get_compat_flock64(&f, compat_ptr(arg)); - if (ret != 0) + err = get_compat_flock64(&flock, compat_ptr(arg)); + if (err) break; - old_fs = get_fs(); - set_fs(KERNEL_DS); - conv_cmd = convert_fcntl_cmd(cmd); - ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); - set_fs(old_fs); - if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { - /* need to return lock information - see above for commentary */ - if (f.l_start > COMPAT_LOFF_T_MAX) - ret = -EOVERFLOW; - if (f.l_len > COMPAT_LOFF_T_MAX) - f.l_len = COMPAT_LOFF_T_MAX; - if (ret == 0) - ret = put_compat_flock64(&f, compat_ptr(arg)); - } + err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); break; - default: - ret = sys_fcntl(fd, cmd, arg); + err = do_fcntl(fd, cmd, arg, f.file); break; } - return ret; + out_put: + fdput(f); + return err; }
COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, diff --combined include/linux/fs.h index 771fe1131467,25ee1ff6d45b..9a540d399d1b --- a/include/linux/fs.h +++ b/include/linux/fs.h @@@ -2,7 -2,7 +2,7 @@@ #define _LINUX_FS_H
#include <linux/linkage.h> -#include <linux/wait.h> +#include <linux/wait_bit.h> #include <linux/kdev_t.h> #include <linux/dcache.h> #include <linux/path.h> @@@ -20,7 -20,6 +20,7 @@@ #include <linux/rwsem.h> #include <linux/capability.h> #include <linux/semaphore.h> +#include <linux/fcntl.h> #include <linux/fiemap.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> @@@ -31,7 -30,6 +31,7 @@@ #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> #include <linux/delayed_call.h> +#include <linux/uuid.h>
#include <asm/byteorder.h> #include <uapi/linux/fs.h> @@@ -144,9 -142,6 +144,9 @@@ typedef int (dio_iodone_t)(struct kioc /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
+/* File is capable of returning -EAGAIN if AIO will block */ +#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@@ -266,18 -261,6 +266,18 @@@ struct page struct address_space; struct writeback_control;
+/* + * Write life time hint values. + */ +enum rw_hint { + WRITE_LIFE_NOT_SET = 0, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +}; + #define IOCB_EVENTFD (1 << 0) #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) @@@ -285,7 -268,6 +285,7 @@@ #define IOCB_DSYNC (1 << 4) #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) +#define IOCB_NOWAIT (1 << 7)
struct kiocb { struct file *ki_filp; @@@ -293,7 -275,6 +293,7 @@@ void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; + enum rw_hint ki_hint; };
static inline bool is_sync_kiocb(struct kiocb *kiocb) @@@ -301,6 -282,16 +301,6 @@@ return kiocb->ki_complete == NULL; }
-static inline int iocb_flags(struct file *file); - -static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -{ - *kiocb = (struct kiocb) { - .ki_filp = filp, - .ki_flags = iocb_flags(filp), - }; -} - /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet @@@ -601,7 -592,6 +601,7 @@@ struct inode spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; unsigned int i_blkbits; + enum rw_hint i_write_hint; blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED @@@ -856,7 -846,6 +856,7 @@@ struct file * Must not be taken from IRQ context. */ spinlock_t f_lock; + enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@@ -1032,6 -1021,8 +1032,6 @@@ struct file_lock_context #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif
-#include <linux/fcntl.h> - extern void send_sigio(struct fown_struct *fown, int fd, int band);
/* @@@ -1047,14 -1038,14 +1047,14 @@@ static inline struct inode *locks_inode }
#ifdef CONFIG_FILE_LOCKING - extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); + extern int fcntl_getlk(struct file *, unsigned int, struct flock *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, - struct flock __user *); + struct flock *);
#if BITS_PER_LONG == 32 - extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); + extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, - struct flock64 __user *); + struct flock64 *); #endif
extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); @@@ -1258,7 -1249,7 +1258,7 @@@ extern void fasync_free(struct fasync_s extern void kill_fasync(struct fasync_struct **, int, int);
extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); - extern void f_setown(struct file *filp, unsigned long arg, int force); + extern int f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); @@@ -1337,8 -1328,8 +1337,8 @@@ struct super_block
struct sb_writers s_writers;
- char s_id[32]; /* Informational name */ - u8 s_uuid[16]; /* UUID */ + char s_id[32]; /* Informational name */ + uuid_t s_uuid; /* UUID */
void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; @@@ -1882,25 -1873,6 +1882,25 @@@ static inline bool HAS_UNMAPPED_ID(stru return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); }
+static inline enum rw_hint file_write_hint(struct file *file) +{ + if (file->f_write_hint != WRITE_LIFE_NOT_SET) + return file->f_write_hint; + + return file_inode(file)->i_write_hint; +} + +static inline int iocb_flags(struct file *file); + +static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + .ki_flags = iocb_flags(filp), + .ki_hint = file_write_hint(filp), + }; +} + /* * Inode state bits. Protected by inode->i_lock * @@@ -2545,8 -2517,6 +2545,8 @@@ extern int filemap_fdatawait(struct add extern void filemap_fdatawait_keep_errors(struct address_space *); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); +extern bool filemap_range_has_page(struct address_space *, loff_t lstart, + loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); @@@ -2873,7 -2843,7 +2873,7 @@@ enum DIO_SKIP_DIO_COUNT = 0x08, };
-void dio_end_io(struct bio *bio, int error); +void dio_end_io(struct bio *bio);
ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, @@@ -3086,25 -3056,6 +3086,25 @@@ static inline int iocb_flags(struct fil return res; }
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) +{ + if (unlikely(flags & ~RWF_SUPPORTED)) + return -EOPNOTSUPP; + + if (flags & RWF_NOWAIT) { + if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT)) + return -EOPNOTSUPP; + ki->ki_flags |= IOCB_NOWAIT; + } + if (flags & RWF_HIPRI) + ki->ki_flags |= IOCB_HIPRI; + if (flags & RWF_DSYNC) + ki->ki_flags |= IOCB_DSYNC; + if (flags & RWF_SYNC) + ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + return 0; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; diff --combined net/socket.c index 8f9dab330d57,a30a1e324390..59e902b9df09 --- a/net/socket.c +++ b/net/socket.c @@@ -461,7 -461,7 +461,7 @@@ EXPORT_SYMBOL(sock_from_file) * @err: pointer to an error code return * * The file handle passed in is locked and the socket it is bound - * too is returned. If an error occurs the err pointer is overwritten + * to is returned. If an error occurs the err pointer is overwritten * with a negative errno code and NULL is returned. The function checks * for both invalid handles and passing a handle which is not a socket. * @@@ -662,40 -662,6 +662,40 @@@ static bool skb_is_err_queue(const stru return skb->pkt_type == PACKET_OUTGOING; }
+/* On transmit, software and hardware timestamps are returned independently. + * As the two skb clones share the hardware timestamp, which may be updated + * before the software timestamp is received, a hardware TX timestamp may be + * returned only if there is no software TX timestamp. Ignore false software + * timestamps, which may be made in the __sock_recv_timestamp() call when the + * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a + * hardware timestamp. + */ +static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) +{ + return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); +} + +static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) +{ + struct scm_ts_pktinfo ts_pktinfo; + struct net_device *orig_dev; + + if (!skb_mac_header_was_set(skb)) + return; + + memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); + + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) + ts_pktinfo.if_index = orig_dev->ifindex; + rcu_read_unlock(); + + ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, + sizeof(ts_pktinfo), &ts_pktinfo); +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@@ -704,16 -670,14 +704,16 @@@ void __sock_recv_timestamp(struct msghd { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); struct scm_timestamping tss; - int empty = 1; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
/* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (need_software_tstamp && skb->tstamp == 0) + if (need_software_tstamp && skb->tstamp == 0) { __net_timestamp(skb); + false_tstamp = 1; + }
if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { @@@ -735,13 -699,8 +735,13 @@@ empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) + !skb_is_swtx_tstamp(skb, false_tstamp) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; + if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && + !skb_is_err_queue(skb)) + put_ts_pktinfo(msg, skb); + } if (!empty) { put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(tss), &tss); @@@ -991,8 -950,7 +991,7 @@@ static long sock_ioctl(struct file *fil err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; - f_setown(sock->file, pid, 1); - err = 0; + err = f_setown(sock->file, pid, 1); break; case FIOGETOWN: case SIOCGPGRP: