The following commit has been merged in the master branch: commit 0dfc98ef0068d9604483094f1f2c66b100a9b55b Merge: 97669a52245749209fb7117e94bdd647265b76a8 ec2f877856e0af3889940e00b160f7f20f8d774f Author: Stephen Rothwell sfr@canb.auug.org.au Date: Mon Dec 9 11:48:03 2019 +1100
Merge remote-tracking branch 'kspp/for-next/kspp'
# Conflicts: # arch/x86/kernel/fpu/xstate.c
diff --combined Documentation/process/coding-style.rst index ada573b7d703,146b3a2c661c..e7c60f5339b2 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@@ -56,7 -56,7 +56,7 @@@ instead of ``double-indenting`` the ``c case 'K': case 'k': mem <<= 10; - /* fall through */ + fallthrough; default: break; } @@@ -988,7 -988,7 +988,7 @@@ Similarly, if you need to calculate th
.. code-block:: c
- #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + #define sizeof_member(t, f) (sizeof(((t*)0)->f))
There are also min() and max() macros that do strict type checking if you need them. Feel free to peruse that header file to see what else is already diff --combined arch/arm64/include/asm/processor.h index 5ba63204d078,2d61a909984d..257b742b960d --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@@ -9,7 -9,7 +9,7 @@@ #define __ASM_PROCESSOR_H
#define KERNEL_DS UL(-1) -#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1) +#define USER_DS ((UL(1) << VA_BITS) - 1)
/* * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is @@@ -26,12 -26,10 +26,12 @@@ #include <linux/init.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/thread_info.h>
#include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/hw_breakpoint.h> +#include <asm/kasan.h> #include <asm/lse.h> #include <asm/pgtable-hwdef.h> #include <asm/pointer_auth.h> @@@ -154,13 -152,13 +154,13 @@@ static inline void arch_thread_struct_w unsigned long *size) { /* Verify that there is no padding among the whitelisted fields: */ - BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) != - sizeof_field(struct thread_struct, uw.tp_value) + - sizeof_field(struct thread_struct, uw.tp2_value) + - sizeof_field(struct thread_struct, uw.fpsimd_state)); + BUILD_BUG_ON(sizeof_member(struct thread_struct, uw) != + sizeof_member(struct thread_struct, uw.tp_value) + + sizeof_member(struct thread_struct, uw.tp2_value) + + sizeof_member(struct thread_struct, uw.fpsimd_state));
*offset = offsetof(struct thread_struct, uw); - *size = sizeof_field(struct thread_struct, uw); + *size = sizeof_member(struct thread_struct, uw); }
#ifdef CONFIG_COMPAT @@@ -216,18 -214,6 +216,18 @@@ static inline void start_thread(struct regs->sp = sp; }
+static inline bool is_ttbr0_addr(unsigned long addr) +{ + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; +} + +static inline bool is_ttbr1_addr(unsigned long addr) +{ + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; +} + #ifdef CONFIG_COMPAT static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) diff --combined arch/x86/kernel/fpu/xstate.c index 319be936c348,023b0a28e13b..4b03ce58537d --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@@ -60,7 -60,7 +60,7 @@@ u64 xfeatures_mask __read_mostly
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; +static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
/* * The XSAVE area of kernel can be in standard or compacted format; @@@ -254,13 -254,10 +254,13 @@@ static void __init setup_xstate_feature * in the fixed offsets in the xsave area in either compacted form * or standard form. */ - xstate_offsets[0] = 0; - xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space); - xstate_offsets[1] = xstate_sizes[0]; - xstate_sizes[1] = sizeof_member(struct fxregs_state, xmm_space); + xstate_offsets[XFEATURE_FP] = 0; + xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, + xmm_space); + + xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; - xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state, - xmm_space); ++ xstate_sizes[XFEATURE_SSE] = sizeof_member(struct fxregs_state, ++ xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { if (!xfeature_enabled(i)) @@@ -345,7 -342,7 +345,7 @@@ static int xfeature_is_aligned(int xfea */ static void __init setup_xstate_comp(void) { - unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; + unsigned int xstate_comp_sizes[XFEATURE_MAX]; int i;
/* @@@ -353,9 -350,8 +353,9 @@@ * in the fixed offsets in the xsave area in either compacted form * or standard form. */ - xstate_comp_offsets[0] = 0; - xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); + xstate_comp_offsets[XFEATURE_FP] = 0; + xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state, + xmm_space);
if (!boot_cpu_has(X86_FEATURE_XSAVES)) { for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { @@@ -844,7 -840,7 +844,7 @@@ void *get_xsave_addr(struct xregs_stat
/* * We should not ever be requesting features that we - * have not enabled. Remember that pcntxt_mask is + * have not enabled. Remember that xfeatures_mask is * what we write to the XCR0 register. */ WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), diff --combined block/blk-core.c index 9c121f142a5d,fd6b31d400e7..550e80a955f2 --- a/block/blk-core.c +++ b/block/blk-core.c @@@ -132,9 -132,6 +132,9 @@@ static const char *const blk_op_name[] REQ_OP_NAME(SECURE_ERASE), REQ_OP_NAME(ZONE_RESET), REQ_OP_NAME(ZONE_RESET_ALL), + REQ_OP_NAME(ZONE_OPEN), + REQ_OP_NAME(ZONE_CLOSE), + REQ_OP_NAME(ZONE_FINISH), REQ_OP_NAME(WRITE_SAME), REQ_OP_NAME(WRITE_ZEROES), REQ_OP_NAME(SCSI_IN), @@@ -187,22 -184,6 +187,22 @@@ static const struct [BLK_STS_IOERR] = { -EIO, "I/O" }, };
+#ifdef CONFIG_BLK_NOTIFICATIONS +static const +enum block_notification_type blk_notifications[ARRAY_SIZE(blk_errors)] = { + [BLK_STS_TIMEOUT] = NOTIFY_BLOCK_ERROR_TIMEOUT, + [BLK_STS_NOSPC] = NOTIFY_BLOCK_ERROR_NO_SPACE, + [BLK_STS_TRANSPORT] = NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT, + [BLK_STS_TARGET] = NOTIFY_BLOCK_ERROR_CRITICAL_TARGET, + [BLK_STS_NEXUS] = NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS, + [BLK_STS_MEDIUM] = NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM, + [BLK_STS_PROTECTION] = NOTIFY_BLOCK_ERROR_PROTECTION, + [BLK_STS_RESOURCE] = NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE, + [BLK_STS_DEV_RESOURCE] = NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE, + [BLK_STS_IOERR] = NOTIFY_BLOCK_ERROR_IO, +}; +#endif + blk_status_t errno_to_blk_status(int errno) { int i; @@@ -243,19 -224,6 +243,19 @@@ static void print_req_error(struct requ req->cmd_flags & ~REQ_OP_MASK, req->nr_phys_segments, IOPRIO_PRIO_CLASS(req->ioprio)); + +#ifdef CONFIG_BLK_NOTIFICATIONS + if (blk_notifications[idx]) { + struct block_notification n = { + .watch.type = WATCH_TYPE_BLOCK_NOTIFY, + .watch.subtype = blk_notifications[idx], + .watch.info = watch_sizeof(n), + .dev = req->rq_disk ? disk_devt(req->rq_disk) : 0, + .sector = blk_rq_pos(req), + }; + post_block_notification(&n); + } +#endif }
static void req_bio_endio(struct request *rq, struct bio *bio, @@@ -368,14 -336,14 +368,14 @@@ EXPORT_SYMBOL_GPL(blk_set_queue_dying) */ void blk_cleanup_queue(struct request_queue *q) { + WARN_ON_ONCE(blk_queue_registered(q)); + /* mark @q DYING, no new request or merges will be allowed afterwards */ - mutex_lock(&q->sysfs_lock); blk_set_queue_dying(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); blk_queue_flag_set(QUEUE_FLAG_DYING, q); - mutex_unlock(&q->sysfs_lock);
/* * Drain all requests queued before DYING marking. Set DEAD flag to @@@ -880,7 -848,11 +880,7 @@@ static inline int blk_partition_remap(s if (unlikely(bio_check_ro(bio, p))) goto out;
- /* - * Zone reset does not include bi_size so bio_sectors() is always 0. - * Include a test for the reset op code and perform the remap if needed. - */ - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { + if (bio_sectors(bio)) { if (bio_check_eod(bio, part_nr_sects_read(p))) goto out; bio->bi_iter.bi_sector += p->start_sect; @@@ -964,9 -936,6 +964,9 @@@ generic_make_request_checks(struct bio goto not_supported; break; case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!blk_queue_is_zoned(q)) goto not_supported; break; @@@ -1821,9 -1790,9 +1821,9 @@@ int __init blk_dev_init(void { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * - FIELD_SIZEOF(struct request, cmd_flags)); + sizeof_member(struct request, cmd_flags)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * - FIELD_SIZEOF(struct bio, bi_opf)); + sizeof_member(struct bio, bi_opf));
/* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", diff --combined crypto/adiantum.c index aded26092268,d79d901afd28..cb7ba22e3a62 --- a/crypto/adiantum.c +++ b/crypto/adiantum.c @@@ -33,7 -33,6 +33,7 @@@ #include <crypto/b128ops.h> #include <crypto/chacha.h> #include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> #include <crypto/internal/skcipher.h> #include <crypto/nhpoly1305.h> #include <crypto/scatterwalk.h> @@@ -243,11 -242,11 +243,11 @@@ static void adiantum_hash_header(struc
BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); poly1305_core_blocks(&state, &tctx->header_hash_key, - &header, sizeof(header) / POLY1305_BLOCK_SIZE); + &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, - TWEAK_SIZE / POLY1305_BLOCK_SIZE); + TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
poly1305_core_emit(&state, &rctx->header_hash); } @@@ -436,10 -435,10 +436,10 @@@ static int adiantum_init_tfm(struct cry
BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) != sizeof(struct adiantum_request_ctx)); - subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx, + subreq_size = max(sizeof_member(struct adiantum_request_ctx, u.hash_desc) + crypto_shash_descsize(hash), - FIELD_SIZEOF(struct adiantum_request_ctx, + sizeof_member(struct adiantum_request_ctx, u.streamcipher_req) + crypto_skcipher_reqsize(streamcipher));
diff --combined crypto/essiv.c index 808f2b362106,fd5a87baafff..75d810d98f8e --- a/crypto/essiv.c +++ b/crypto/essiv.c @@@ -188,7 -188,8 +188,7 @@@ static void essiv_aead_done(struct cryp struct aead_request *req = areq->data; struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
- if (rctx->assoc) - kfree(rctx->assoc); + kfree(rctx->assoc); aead_request_complete(req, err); }
@@@ -347,7 -348,7 +347,7 @@@ static int essiv_aead_init_tfm(struct c if (IS_ERR(aead)) return PTR_ERR(aead);
- subreq_size = FIELD_SIZEOF(struct essiv_aead_request_ctx, aead_req) + + subreq_size = sizeof_member(struct essiv_aead_request_ctx, aead_req) + crypto_aead_reqsize(aead);
tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) + @@@ -485,7 -486,7 +485,7 @@@ static int essiv_create(struct crypto_t type = algt->type & algt->mask;
switch (type) { - case CRYPTO_ALG_TYPE_BLKCIPHER: + case CRYPTO_ALG_TYPE_SKCIPHER: skcipher_inst = kzalloc(sizeof(*skcipher_inst) + sizeof(*ictx), GFP_KERNEL); if (!skcipher_inst) @@@ -585,7 -586,7 +585,7 @@@ base->cra_alignmask = block_base->cra_alignmask; base->cra_priority = block_base->cra_priority;
- if (type == CRYPTO_ALG_TYPE_BLKCIPHER) { + if (type == CRYPTO_ALG_TYPE_SKCIPHER) { skcipher_inst->alg.setkey = essiv_skcipher_setkey; skcipher_inst->alg.encrypt = essiv_skcipher_encrypt; skcipher_inst->alg.decrypt = essiv_skcipher_decrypt; @@@ -627,7 -628,7 +627,7 @@@ out_free_hash: crypto_mod_put(_hash_alg); out_drop_skcipher: - if (type == CRYPTO_ALG_TYPE_BLKCIPHER) + if (type == CRYPTO_ALG_TYPE_SKCIPHER) crypto_drop_skcipher(&ictx->u.skcipher_spawn); else crypto_drop_aead(&ictx->u.aead_spawn); diff --combined drivers/firmware/efi/efi.c index b0961950d918,17d32fb72c9a..bfd74ad2b22d --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@@ -81,11 -81,6 +81,11 @@@ bool efi_runtime_disabled(void return disable_runtime; }
+bool __pure __efi_soft_reserve_enabled(void) +{ + return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE); +} + static int __init parse_efi_cmdline(char *str) { if (!str) { @@@ -99,9 -94,6 +99,9 @@@ if (parse_option_str(str, "noruntime")) disable_runtime = true;
+ if (parse_option_str(str, "nosoftreserve")) + set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags); + return 0; } early_param("efi", parse_efi_cmdline); @@@ -275,9 -267,6 +275,9 @@@ static __init int efivar_ssdt_load(void void *data; int ret;
+ if (!efivar_ssdt[0]) + return 0; + ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
list_for_each_entry_safe(entry, aux, &entries, list) { @@@ -304,7 -293,7 +304,7 @@@ goto free_data; }
- ret = acpi_load_table(data); + ret = acpi_load_table(data, NULL); if (ret) { pr_err("failed to load table: %d\n", ret); goto free_data; @@@ -562,7 -551,7 +562,7 @@@ int __init efi_config_parse_tables(voi sizeof(*seed) + size); if (seed != NULL) { pr_notice("seeding entropy pool\n"); - add_device_randomness(seed->bits, seed->size); + add_bootloader_randomness(seed->bits, seed->size); early_memunmap(seed, sizeof(*seed) + size); } else { pr_err("Could not map UEFI random seed!\n"); @@@ -681,7 -670,7 +681,7 @@@ device_initcall(efi_load_efivars) { name }, \ { prop }, \ offsetof(struct efi_fdt_params, field), \ - FIELD_SIZEOF(struct efi_fdt_params, field) \ + sizeof_member(struct efi_fdt_params, field) \ }
struct params { @@@ -850,16 -839,15 +850,16 @@@ char * __init efi_md_typeattr_format(ch if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | - EFI_MEMORY_NV | + EFI_MEMORY_NV | EFI_MEMORY_SP | EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else snprintf(pos, size, - "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", + attr & EFI_MEMORY_SP ? "SP" : "", attr & EFI_MEMORY_NV ? "NV" : "", attr & EFI_MEMORY_XP ? "XP" : "", attr & EFI_MEMORY_RP ? "RP" : "", @@@ -979,24 -967,6 +979,24 @@@ static int __init efi_memreserve_map_ro return 0; }
+static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) +{ + struct resource *res, *parent; + + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + if (!res) + return -ENOMEM; + + res->name = "reserved"; + res->flags = IORESOURCE_MEM; + res->start = addr; + res->end = addr + size - 1; + + /* we expect a conflict with a 'System RAM' region */ + parent = request_resource_conflict(&iomem_resource, res); + return parent ? request_resource(parent, res) : 0; +} + int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { struct linux_efi_memreserve *rsv; @@@ -1021,7 -991,7 +1021,7 @@@ rsv->entry[index].size = size;
memunmap(rsv); - return 0; + return efi_mem_reserve_iomem(addr, size); } memunmap(rsv); } @@@ -1031,12 -1001,6 +1031,12 @@@ if (!rsv) return -ENOMEM;
+ rc = efi_mem_reserve_iomem(__pa(rsv), SZ_4K); + if (rc) { + free_page((unsigned long)rsv); + return rc; + } + /* * The memremap() call above assumes that a linux_efi_memreserve entry * never crosses a page boundary, so let's ensure that this remains true @@@ -1053,7 -1017,7 +1053,7 @@@ efi_memreserve_root->next = __pa(rsv); spin_unlock(&efi_mem_reserve_persistent_lock);
- return 0; + return efi_mem_reserve_iomem(addr, size); }
static int __init efi_memreserve_root_init(void) diff --combined drivers/gpu/drm/i915/gvt/scheduler.c index 5b2a7d072ec9,76c2db58adcd..12c69712c5b6 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@@ -38,7 -38,6 +38,7 @@@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h"
#include "i915_drv.h" #include "gvt.h" @@@ -195,7 -194,7 +195,7 @@@ static int populate_shadow_context(stru return -EFAULT; }
- page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, I915_GTT_PAGE_SIZE); @@@ -366,8 -365,7 +366,8 @@@ static void set_context_ppgtt_from_shad struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); + struct i915_ppgtt *ppgtt = + i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); int i = 0;
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@@ -380,8 -378,6 +380,8 @@@ px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } + + i915_vm_put(&ppgtt->vm); }
static int @@@ -389,8 -385,11 +389,8 @@@ intel_gvt_workload_req_alloc(struct int { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq;
- lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->req) return 0;
@@@ -416,9 -415,10 +416,9 @@@ int intel_gvt_scan_and_shadow_workload( { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&vgpu->vgpu_lock);
if (workload->shadow) return 0; @@@ -580,6 -580,8 +580,6 @@@ static void update_vreg_in_ctx(struct i
static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) { - struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_shadow_bb *bb, *pos;
if (list_empty(&workload->shadow_bb)) @@@ -588,6 -590,8 +588,6 @@@ bb = list_first_entry(&workload->shadow_bb, struct intel_vgpu_shadow_bb, list);
- mutex_lock(&dev_priv->drm.struct_mutex); - list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { if (bb->accessing) @@@ -605,6 -609,8 +605,6 @@@ list_del(&bb->list); kfree(bb); } - - mutex_unlock(&dev_priv->drm.struct_mutex); }
static int prepare_workload(struct intel_vgpu_workload *workload) @@@ -679,6 -685,7 +679,6 @@@ err_unpin_mm static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@@ -687,6 -694,7 +687,6 @@@ ring_id, workload);
mutex_lock(&vgpu->vgpu_lock); - mutex_lock(&dev_priv->drm.struct_mutex);
ret = intel_gvt_workload_req_alloc(workload); if (ret) @@@ -721,6 -729,7 +721,6 @@@ out err_req: if (ret) workload->status = ret; - mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&vgpu->vgpu_lock); return ret; } @@@ -835,7 -844,7 +835,7 @@@ static void update_guest_context(struc return; }
- page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, I915_GTT_PAGE_SIZE); @@@ -878,7 -887,7 +878,7 @@@ void intel_vgpu_clean_workloads(struct intel_engine_mask_t tmp;
/* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); @@@ -1224,18 -1233,20 +1224,18 @@@ int intel_vgpu_setup_submission(struct struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; struct i915_gem_context *ctx; + struct i915_ppgtt *ppgtt; enum intel_engine_id i; int ret;
- mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } + if (IS_ERR(ctx)) + return PTR_ERR(ctx);
i915_gem_context_set_force_single_submission(ctx);
- i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm)); + ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); + i915_context_ppgtt_root_save(s, ppgtt);
for_each_engine(engine, i915, i) { struct intel_context *ce; @@@ -1269,7 -1280,7 +1269,7 @@@ sizeof(struct intel_vgpu_workload), 0, SLAB_HWCACHE_ALIGN, offsetof(struct intel_vgpu_workload, rb_tail), - sizeof_field(struct intel_vgpu_workload, rb_tail), + sizeof_member(struct intel_vgpu_workload, rb_tail), NULL);
if (!s->workloads) { @@@ -1280,12 -1291,12 +1280,12 @@@ atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
+ i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); - mutex_unlock(&i915->drm.struct_mutex); return 0;
out_shadow_ctx: - i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm)); + i915_context_ppgtt_root_restore(s, ppgtt); for_each_engine(engine, i915, i) { if (IS_ERR(s->shadow[i])) break; @@@ -1293,8 -1304,9 +1293,8 @@@ intel_context_unpin(s->shadow[i]); intel_context_put(s->shadow[i]); } + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; }
@@@ -1585,7 -1597,9 +1585,7 @@@ intel_vgpu_create_workload(struct intel */ if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); }
diff --combined drivers/infiniband/hw/efa/efa_verbs.c index c9d294caa27a,d5b396913875..0d4177c51881 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@@ -13,6 -13,10 +13,6 @@@
#include "efa.h"
-#define EFA_MMAP_FLAG_SHIFT 56 -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) -#define EFA_MMAP_INVALID U64_MAX - enum { EFA_MMAP_DMA_PAGE = 0, EFA_MMAP_IO_WC, @@@ -23,12 -27,20 +23,12 @@@ (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-struct efa_mmap_entry { - void *obj; +struct efa_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; u64 address; - u64 length; - u32 mmap_page; u8 mmap_flag; };
-static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) -{ - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | - ((u64)efa->mmap_page << PAGE_SHIFT); -} - #define EFA_DEFINE_STATS(op) \ op(EFA_TX_BYTES, "tx_bytes") \ op(EFA_TX_PKTS, "tx_pkts") \ @@@ -70,6 -82,8 +70,6 @@@ static const char *const efa_stats_name #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
-#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE - struct pbl_chunk { dma_addr_t dma_addr; u64 *buf; @@@ -133,19 -147,8 +133,19 @@@ static inline struct efa_ah *to_eah(str return container_of(ibah, struct efa_ah, ibah); }
+static inline struct efa_user_mmap_entry * +to_emmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); +} + +static inline bool is_rdma_read_cap(struct efa_dev *dev) +{ + return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ - FIELD_SIZEOF(typeof(x), fld) <= (sz)) + sizeof_member(typeof(x), fld) <= (sz))
#define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@@ -169,6 -172,106 +169,6 @@@ static void *efa_zalloc_mapped(struct e return addr; }
-/* - * This is only called when the ucontext is destroyed and there can be no - * concurrent query via mmap or allocate on the xarray, thus we can be sure no - * other thread is using the entry pointer. We also know that all the BAR - * pages have either been zap'd or munmaped at this point. Normal pages are - * refcounted and will be freed at the proper time. - */ -static void mmap_entries_remove_free(struct efa_dev *dev, - struct efa_ucontext *ucontext) -{ - struct efa_mmap_entry *entry; - unsigned long mmap_page; - - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { - xa_erase(&ucontext->mmap_xa, mmap_page); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, get_mmap_key(entry), entry->address, - entry->length); - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - /* DMA mapping is already gone, now free the pages */ - free_pages_exact(phys_to_virt(entry->address), - entry->length); - kfree(entry); - } -} - -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, - struct efa_ucontext *ucontext, - u64 key, u64 len) -{ - struct efa_mmap_entry *entry; - u64 mmap_page; - - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; - if (mmap_page > U32_MAX) - return NULL; - - entry = xa_load(&ucontext->mmap_xa, mmap_page); - if (!entry || get_mmap_key(entry) != key || entry->length != len) - return NULL; - - ibdev_dbg(&dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, key, entry->address, entry->length); - - return entry; -} - -/* - * Note this locking scheme cannot support removal of entries, except during - * ucontext destruction when the core code guarentees no concurrency. - */ -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, - void *obj, u64 address, u64 length, u8 mmap_flag) -{ - struct efa_mmap_entry *entry; - u32 next_mmap_page; - int err; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return EFA_MMAP_INVALID; - - entry->obj = obj; - entry->address = address; - entry->length = length; - entry->mmap_flag = mmap_flag; - - xa_lock(&ucontext->mmap_xa); - if (check_add_overflow(ucontext->mmap_xa_page, - (u32)(length >> PAGE_SHIFT), - &next_mmap_page)) - goto err_unlock; - - entry->mmap_page = ucontext->mmap_xa_page; - ucontext->mmap_xa_page = next_mmap_page; - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, - GFP_KERNEL); - if (err) - goto err_unlock; - - xa_unlock(&ucontext->mmap_xa); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", - entry->obj, entry->address, entry->length, get_mmap_key(entry)); - - return get_mmap_key(entry); - -err_unlock: - xa_unlock(&ucontext->mmap_xa); - kfree(entry); - return EFA_MMAP_INVALID; - -} - int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@@ -203,17 -306,12 +203,17 @@@ dev_attr->max_rq_depth); props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; + props->max_sge_rd = dev_attr->max_wr_rdma_sge;
if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; resp.max_rq_sge = dev_attr->max_rq_sge; resp.max_sq_wr = dev_attr->max_sq_depth; resp.max_rq_wr = dev_attr->max_rq_depth; + resp.max_rdma_size = dev_attr->max_rdma_size; + + if (is_rdma_read_cap(dev)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); @@@ -240,9 -338,9 +240,9 @@@ int efa_query_port(struct ib_device *ib props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; props->active_width = IB_WIDTH_4X; - props->max_mtu = ib_mtu_int_to_enum(dev->mtu); - props->active_mtu = ib_mtu_int_to_enum(dev->mtu); - props->max_msg_sz = dev->mtu; + props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->max_msg_sz = dev->dev_attr.mtu; props->max_vl_num = 1;
return 0; @@@ -303,7 -401,7 +303,7 @@@ int efa_query_gid(struct ib_device *ibd { struct efa_dev *dev = to_edev(ibdev);
- memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
return 0; } @@@ -387,19 -485,8 +387,19 @@@ static int efa_destroy_qp_handle(struc return efa_com_destroy_qp(&dev->edev, ¶ms); }
+static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, + struct efa_qp *qp) +{ + rdma_user_mmap_entry_remove(qp->rq_mmap_entry); + rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); + rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); + rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); +} + int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, ibucontext); struct efa_dev *dev = to_edev(ibqp->pd->device); struct efa_qp *qp = to_eqp(ibqp); int err; @@@ -418,101 -505,61 +418,101 @@@ DMA_TO_DEVICE); }
+ efa_qp_user_mmap_entries_remove(ucontext, qp); kfree(qp); return 0; }
+static struct rdma_user_mmap_entry* +efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, + u64 address, size_t length, + u8 mmap_flag, u64 *offset) +{ + struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int err; + + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_flag = mmap_flag; + + err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, + length); + if (err) { + kfree(entry); + return NULL; + } + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + static int qp_mmap_entries_setup(struct efa_qp *qp, struct efa_dev *dev, struct efa_ucontext *ucontext, struct efa_com_create_qp_params *params, struct efa_ibv_create_qp_resp *resp) { - /* - * Once an entry is inserted it might be mmapped, hence cannot be - * cleaned up until dealloc_ucontext. - */ - resp->sq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->sq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + size_t length; + u64 address; + + address = dev->db_bar_addr + resp->sq_db_offset; + qp->sq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->sq_db_mmap_key); + if (!qp->sq_db_mmap_entry) return -ENOMEM;
resp->sq_db_offset &= ~PAGE_MASK;
- resp->llq_desc_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->mem_bar_addr + resp->llq_desc_offset, - PAGE_ALIGN(params->sq_ring_size_in_bytes + - (resp->llq_desc_offset & ~PAGE_MASK)), - EFA_MMAP_IO_WC); - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->mem_bar_addr + resp->llq_desc_offset; + length = PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)); + + qp->llq_desc_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, length, + EFA_MMAP_IO_WC, + &resp->llq_desc_mmap_key); + if (!qp->llq_desc_mmap_entry) + goto err_remove_mmap;
resp->llq_desc_offset &= ~PAGE_MASK;
if (qp->rq_size) { - resp->rq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->rq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->db_bar_addr + resp->rq_db_offset; + + qp->rq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, PAGE_SIZE, + EFA_MMAP_IO_NC, + &resp->rq_db_mmap_key); + if (!qp->rq_db_mmap_entry) + goto err_remove_mmap;
resp->rq_db_offset &= ~PAGE_MASK;
- resp->rq_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - virt_to_phys(qp->rq_cpu_addr), - qp->rq_size, EFA_MMAP_DMA_PAGE); - if (resp->rq_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = virt_to_phys(qp->rq_cpu_addr); + qp->rq_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, qp->rq_size, + EFA_MMAP_DMA_PAGE, + &resp->rq_mmap_key); + if (!qp->rq_mmap_entry) + goto err_remove_mmap;
resp->rq_mmap_size = qp->rq_size; }
return 0; + +err_remove_mmap: + efa_qp_user_mmap_entries_remove(ucontext, qp); + + return -ENOMEM; }
static int efa_qp_validate_cap(struct efa_dev *dev, @@@ -587,6 -634,7 +587,6 @@@ struct ib_qp *efa_create_qp(struct ib_p struct efa_dev *dev = to_edev(ibpd->device); struct efa_ibv_create_qp_resp resp = {}; struct efa_ibv_create_qp cmd = {}; - bool rq_entry_inserted = false; struct efa_ucontext *ucontext; struct efa_qp *qp; int err; @@@ -694,6 -742,7 +694,6 @@@ if (err) goto err_destroy_qp;
- rq_entry_inserted = true; qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; qp->ibqp.qp_type = init_attr->qp_type; @@@ -710,7 -759,7 +710,7 @@@ ibdev_dbg(&dev->ibdev, "Failed to copy udata for qp[%u]\n", create_qp_resp.qp_num); - goto err_destroy_qp; + goto err_remove_mmap_entries; } }
@@@ -718,16 -767,13 +718,16 @@@
return &qp->ibqp;
+err_remove_mmap_entries: + efa_qp_user_mmap_entries_remove(ucontext, qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: if (qp->rq_size) { dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, DMA_TO_DEVICE); - if (!rq_entry_inserted) + + if (!qp->rq_mmap_entry) free_pages_exact(qp->rq_cpu_addr, qp->rq_size); } err_free_qp: @@@ -851,18 -897,16 +851,18 @@@ void efa_destroy_cq(struct ib_cq *ibcq efa_destroy_cq_idx(dev, cq->cq_idx); dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + rdma_user_mmap_entry_remove(cq->mmap_entry); }
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, struct efa_ibv_create_cq_resp *resp) { resp->q_mmap_size = cq->size; - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, - virt_to_phys(cq->cpu_addr), - cq->size, EFA_MMAP_DMA_PAGE); - if (resp->q_mmap_key == EFA_MMAP_INVALID) + cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE, + &resp->q_mmap_key); + if (!cq->mmap_entry) return -ENOMEM;
return 0; @@@ -880,6 -924,7 +880,6 @@@ int efa_create_cq(struct ib_cq *ibcq, c struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); - bool cq_entry_inserted = false; int entries = attr->cqe; int err;
@@@ -968,13 -1013,15 +968,13 @@@ goto err_destroy_cq; }
- cq_entry_inserted = true; - if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_destroy_cq; + goto err_remove_mmap; } }
@@@ -983,16 -1030,13 +983,16 @@@
return 0;
+err_remove_mmap: + rdma_user_mmap_entry_remove(cq->mmap_entry); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - if (!cq_entry_inserted) + if (!cq->mmap_entry) free_pages_exact(cq->cpu_addr, cq->size); + err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); return err; @@@ -1352,7 -1396,6 +1352,7 @@@ struct ib_mr *efa_reg_mr(struct ib_pd * struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; struct pbl_context pbl; + int supp_access_flags; unsigned int pg_sz; struct efa_mr *mr; int inline_size; @@@ -1366,14 -1409,10 +1366,14 @@@ goto err_out; }
- if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + supp_access_flags = + IB_ACCESS_LOCAL_WRITE | + (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + + if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", - access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + access_flags, supp_access_flags); err = -EOPNOTSUPP; goto err_out; } @@@ -1384,7 -1423,7 +1384,7 @@@ goto err_out; }
- mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, @@@ -1395,7 -1434,7 +1395,7 @@@ params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; params.mr_length_in_bytes = length; - params.permissions = access_flags & 0x1; + params.permissions = access_flags;
pg_sz = ib_umem_find_best_pgsz(mr->umem, dev->dev_attr.page_size_cap, @@@ -1517,6 -1556,7 +1517,6 @@@ int efa_alloc_ucontext(struct ib_uconte goto err_out;
ucontext->uarn = result.uarn; - xa_init(&ucontext->mmap_xa);
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; @@@ -1545,56 -1585,38 +1545,56 @@@ void efa_dealloc_ucontext(struct ib_uco struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device);
- mmap_entries_remove_free(dev, ucontext); efa_dealloc_uar(dev, ucontext->uarn); }
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); + + /* DMA mapping is already gone, now free the pages */ + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + free_pages_exact(phys_to_virt(entry->address), + entry->rdma_entry.npages * PAGE_SIZE); + kfree(entry); +} + static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, - struct vm_area_struct *vma, u64 key, u64 length) + struct vm_area_struct *vma) { - struct efa_mmap_entry *entry; + struct rdma_user_mmap_entry *rdma_entry; + struct efa_user_mmap_entry *entry; unsigned long va; + int err = 0; u64 pfn; - int err;
- entry = mmap_entry_get(dev, ucontext, key, length); - if (!entry) { - ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", - key); + rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); + if (!rdma_entry) { + ibdev_dbg(&dev->ibdev, + "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } + entry = to_emmap(rdma_entry);
ibdev_dbg(&dev->ibdev, - "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", - entry->address, length, entry->mmap_flag); + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag);
pfn = entry->address >> PAGE_SHIFT; switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_noncached(vma->vm_page_prot)); + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + rdma_entry); break; case EFA_MMAP_IO_WC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_writecombine(vma->vm_page_prot)); + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; @@@ -1611,13 -1633,12 +1611,13 @@@ if (err) { ibdev_dbg( &dev->ibdev, - "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", - entry->address, length, entry->mmap_flag, err); - return err; + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag, err); }
- return 0; + rdma_user_mmap_entry_put(rdma_entry); + return err; }
int efa_mmap(struct ib_ucontext *ibucontext, @@@ -1625,13 -1646,26 +1625,13 @@@ { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - u64 length = vma->vm_end - vma->vm_start; - u64 key = vma->vm_pgoff << PAGE_SHIFT; + size_t length = vma->vm_end - vma->vm_start;
ibdev_dbg(&dev->ibdev, - "start %#lx, end %#lx, length = %#llx, key = %#llx\n", - vma->vm_start, vma->vm_end, length, key); - - if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { - ibdev_dbg(&dev->ibdev, - "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", - length, PAGE_SIZE, vma->vm_flags); - return -EINVAL; - } - - if (vma->vm_flags & VM_EXEC) { - ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); - return -EPERM; - } + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
- return __efa_mmap(dev, ucontext, vma, key, length); + return __efa_mmap(dev, ucontext, vma); }
static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) diff --combined drivers/infiniband/hw/hfi1/sdma.c index 5774dfc22e18,a772955bd2ad..09df585ecee6 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@@ -65,7 -65,6 +65,7 @@@ #define SDMA_DESCQ_CNT 2048 #define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff +#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT; module_param(sdma_descq_cnt, uint, S_IRUGO); @@@ -848,7 -847,7 +848,7 @@@ static const struct rhashtable_params s .nelem_hint = NR_CPUS_HINT, .head_offset = offsetof(struct sdma_rht_node, node), .key_offset = offsetof(struct sdma_rht_node, cpu_id), - .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id), + .key_len = sizeof_member(struct sdma_rht_node, cpu_id), .max_size = NR_CPUS, .min_size = 8, .automatic_shrinking = true, @@@ -881,8 -880,8 +881,8 @@@ struct sdma_engine *sdma_select_user_en
cpu_id = smp_processor_id(); rcu_read_lock(); - rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id, - sdma_rht_params); + rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id, + sdma_rht_params);
if (rht_node && rht_node->map[vl]) { struct sdma_rht_map_elem *map = rht_node->map[vl]; @@@ -1297,7 -1296,7 +1297,7 @@@ void sdma_clean(struct hfi1_devdata *dd struct sdma_engine *sde;
if (dd->sdma_pad_dma) { - dma_free_coherent(&dd->pcidev->dev, 4, + dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, (void *)dd->sdma_pad_dma, dd->sdma_pad_phys); dd->sdma_pad_dma = NULL; @@@ -1492,7 -1491,7 +1492,7 @@@ int sdma_init(struct hfi1_devdata *dd, }
/* Allocate memory for pad */ - dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32), + dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD, &dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) { dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); @@@ -1527,11 -1526,8 +1527,11 @@@ }
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --combined drivers/infiniband/hw/hfi1/verbs.h index b0e9bf7cd150,95a0c0b73387..c673cdca5689 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@@ -107,9 -107,9 +107,9 @@@ enum HFI1_HAS_GRH = (1 << 0), };
- #define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh)) + #define LRH_16B_BYTES (sizeof_member(struct hfi1_16b_header, lrh)) #define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32)) - #define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh)) + #define LRH_9B_BYTES (sizeof_member(struct ib_header, lrh)) #define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
/* 24Bits for qpn, upper 8Bits reserved */ @@@ -330,8 -330,9 +330,8 @@@ void hfi1_sys_guid_chg(struct hfi1_ibpo void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index);
/* * The PSN_MASK and PSN_SHIFT allow for diff --combined drivers/md/raid5-ppl.c index cab5b1352892,51e070a9c5e6..fb4e31f19c53 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@@ -1360,7 -1360,7 +1360,7 @@@ int ppl_init_log(struct r5conf *conf return -EINVAL; }
- max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) * + max_disks = sizeof_member(struct ppl_log, disk_flush_bitmap) * BITS_PER_BYTE; if (conf->raid_disks > max_disks) { pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n", @@@ -1404,7 -1404,7 +1404,7 @@@ atomic64_set(&ppl_conf->seq, 0); INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); spin_lock_init(&ppl_conf->no_mem_stripes_lock); - ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET; + ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
if (!mddev->external) { ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d862e9ba27e1,491acfe19e2a..7bf09b6a07fb --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@@ -55,8 -55,6 +55,8 @@@
#define HCLGE_LINK_STATUS_MS 10
+#define HCLGE_VF_VPORT_START_NUM 1 + static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps); static int hclge_init_vlan_config(struct hclge_dev *hdev); static void hclge_sync_vlan_filter(struct hclge_dev *hdev); @@@ -325,7 -323,8 +325,7 @@@ static const struct hclge_mac_mgr_tbl_e { .flags = HCLGE_MAC_MGR_MASK_VLAN_B, .ethter_type = cpu_to_le16(ETH_P_LLDP), - .mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)), - .mac_addr_lo16 = cpu_to_le16(htons(0x000E)), + .mac_addr = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e}, .i_port_bitmap = 0x1, }, }; @@@ -907,9 -906,6 +907,9 @@@ static int hclge_query_pf_resource(stru hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+ /* nic's msix numbers is always equals to the roce's. */ + hdev->num_nic_msi = hdev->num_roce_msi; + /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ @@@ -919,15 -915,6 +919,15 @@@ hdev->num_msi = hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); + + hdev->num_nic_msi = hdev->num_msi; + } + + if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) { + dev_err(&hdev->pdev->dev, + "Just %u msi resources, not enough for pf(min:2).\n", + hdev->num_nic_msi); + return -EINVAL; }
return 0; @@@ -1195,35 -1182,6 +1195,35 @@@ static void hclge_parse_link_mode(struc hclge_parse_backplane_link_mode(hdev, speed_ability); }
+static u32 hclge_get_max_speed(u8 speed_ability) +{ + if (speed_ability & HCLGE_SUPPORT_100G_BIT) + return HCLGE_MAC_SPEED_100G; + + if (speed_ability & HCLGE_SUPPORT_50G_BIT) + return HCLGE_MAC_SPEED_50G; + + if (speed_ability & HCLGE_SUPPORT_40G_BIT) + return HCLGE_MAC_SPEED_40G; + + if (speed_ability & HCLGE_SUPPORT_25G_BIT) + return HCLGE_MAC_SPEED_25G; + + if (speed_ability & HCLGE_SUPPORT_10G_BIT) + return HCLGE_MAC_SPEED_10G; + + if (speed_ability & HCLGE_SUPPORT_1G_BIT) + return HCLGE_MAC_SPEED_1G; + + if (speed_ability & HCLGE_SUPPORT_100M_BIT) + return HCLGE_MAC_SPEED_100M; + + if (speed_ability & HCLGE_SUPPORT_10M_BIT) + return HCLGE_MAC_SPEED_10M; + + return HCLGE_MAC_SPEED_1G; +} + static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) { struct hclge_cfg_param_cmd *req; @@@ -1394,11 -1352,9 +1394,11 @@@ static int hclge_configure(struct hclge
hclge_parse_link_mode(hdev, cfg.speed_ability);
+ hdev->hw.mac.max_speed = hclge_get_max_speed(cfg.speed_ability); + if ((hdev->tc_max > HNAE3_MAX_TC) || (hdev->tc_max < 1)) { - dev_warn(&hdev->pdev->dev, "TC num = %d.\n", + dev_warn(&hdev->pdev->dev, "TC num = %u.\n", hdev->tc_max); hdev->tc_max = 1; } @@@ -1551,10 -1507,6 +1551,10 @@@ static int hclge_assign_tqp(struct hcl kinfo->rss_size = min_t(u16, hdev->rss_size_max, vport->alloc_tqps / hdev->tm_info.num_tc);
+ /* ensure one to one mapping between irq and queue at default */ + kinfo->rss_size = min_t(u16, kinfo->rss_size, + (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc); + return 0; }
@@@ -1658,7 -1610,7 +1658,7 @@@ static int hclge_alloc_vport(struct hcl num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
if (hdev->num_tqps < num_vport) { - dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)", + dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)", hdev->num_tqps, num_vport); return -EINVAL; } @@@ -1681,7 -1633,6 +1681,7 @@@ for (i = 0; i < num_vport; i++) { vport->back = hdev; vport->vport_id = i; + vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO; vport->mps = HCLGE_MAC_DEFAULT_FRAME; vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE; vport->rxvlan_cfg.rx_vlan_offload_en = true; @@@ -2334,8 -2285,7 +2334,8 @@@ static int hclge_init_msi(struct hclge_ int vectors; int i;
- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, + hdev->num_msi, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (vectors < 0) { dev_err(&pdev->dev, @@@ -2345,12 -2295,11 +2345,12 @@@ } if (vectors < hdev->num_msi) dev_warn(&hdev->pdev->dev, - "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n", + "requested %u MSI/MSI-X, but allocated %d MSI/MSI-X\n", hdev->num_msi, vectors);
hdev->num_msi = vectors; hdev->num_msi_left = vectors; + hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = hdev->base_msi_vector + hdev->roce_base_msix_offset; @@@ -2777,7 -2726,7 +2777,7 @@@ static void hclge_update_port_capabilit else if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER) mac->module_type = HNAE3_MODULE_TYPE_TP;
- if (mac->support_autoneg == true) { + if (mac->support_autoneg) { linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mac->supported); linkmode_copy(mac->advertising, mac->supported); } else { @@@ -2904,62 -2853,6 +2904,62 @@@ static int hclge_get_status(struct hnae return hdev->hw.mac.link; }
+static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf) +{ + if (pci_num_vf(hdev->pdev) == 0) { + dev_err(&hdev->pdev->dev, + "SRIOV is disabled, can not get vport(%d) info.\n", vf); + return NULL; + } + + if (vf < 0 || vf >= pci_num_vf(hdev->pdev)) { + dev_err(&hdev->pdev->dev, + "vf id(%d) is out of range(0 <= vfid < %d)\n", + vf, pci_num_vf(hdev->pdev)); + return NULL; + } + + /* VF start from 1 in vport */ + vf += HCLGE_VF_VPORT_START_NUM; + return &hdev->vport[vf]; +} + +static int hclge_get_vf_config(struct hnae3_handle *handle, int vf, + struct ifla_vf_info *ivf) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + ivf->vf = vf; + ivf->linkstate = vport->vf_info.link_state; + ivf->spoofchk = vport->vf_info.spoofchk; + ivf->trusted = vport->vf_info.trusted; + ivf->min_tx_rate = 0; + ivf->max_tx_rate = vport->vf_info.max_tx_rate; + ether_addr_copy(ivf->mac, vport->vf_info.mac); + + return 0; +} + +static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, + int link_state) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + vport->vf_info.link_state = link_state; + + return 0; +} + static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) { u32 rst_src_reg, cmdq_src_reg, msix_src_reg; @@@ -3280,7 -3173,7 +3280,7 @@@ static int hclge_reset_wait(struct hclg
if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) { dev_err(&hdev->pdev->dev, - "flr wait timeout: %d\n", cnt); + "flr wait timeout: %u\n", cnt); return -EBUSY; }
@@@ -3330,7 -3223,7 +3330,7 @@@ static int hclge_set_all_vf_rst(struct ret = hclge_set_vf_rst(hdev, vport->vport_id, reset); if (ret) { dev_err(&hdev->pdev->dev, - "set vf(%d) rst failed %d!\n", + "set vf(%u) rst failed %d!\n", vport->vport_id, ret); return ret; } @@@ -3345,7 -3238,7 +3345,7 @@@ ret = hclge_inform_reset_assert_to_vf(vport); if (ret) dev_warn(&hdev->pdev->dev, - "inform reset to vf(%d) failed %d!\n", + "inform reset to vf(%u) failed %d!\n", vport->vport_id, ret); }
@@@ -3658,7 -3551,7 +3658,7 @@@ static bool hclge_reset_err_handle(stru hdev->rst_stats.reset_fail_cnt++; set_bit(hdev->reset_type, &hdev->reset_pending); dev_info(&hdev->pdev->dev, - "re-schedule reset task(%d)\n", + "re-schedule reset task(%u)\n", hdev->rst_stats.reset_fail_cnt); return true; } @@@ -3669,9 -3562,6 +3669,9 @@@ hclge_reset_handshake(hdev, true);
dev_err(&hdev->pdev->dev, "Reset fail!\n"); + + hclge_dbg_dump_rst_info(hdev); + return false; }
@@@ -3679,28 -3569,12 +3679,28 @@@ static int hclge_set_rst_done(struct hc { struct hclge_pf_rst_done_cmd *req; struct hclge_desc desc; + int ret;
req = (struct hclge_pf_rst_done_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PF_RST_DONE, false); req->pf_rst_done |= HCLGE_PF_RESET_DONE_BIT;
- return hclge_cmd_send(&hdev->hw, &desc, 1); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + /* To be compatible with the old firmware, which does not support + * command HCLGE_OPC_PF_RST_DONE, just print a warning and + * return success + */ + if (ret == -EOPNOTSUPP) { + dev_warn(&hdev->pdev->dev, + "current firmware does not support command(0x%x)!\n", + HCLGE_OPC_PF_RST_DONE); + return 0; + } else if (ret) { + dev_err(&hdev->pdev->dev, "assert PF reset done fail %d!\n", + ret); + } + + return ret; }
static int hclge_reset_prepare_up(struct hclge_dev *hdev) @@@ -3871,13 -3745,12 +3871,13 @@@ static void hclge_reset_event(struct pc HCLGE_RESET_INTERVAL))) { mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL); return; - } else if (hdev->default_reset_request) + } else if (hdev->default_reset_request) { hdev->reset_level = hclge_get_reset_level(ae_dev, &hdev->default_reset_request); - else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ))) + } else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ))) { hdev->reset_level = HNAE3_FUNC_RESET; + }
dev_info(&hdev->pdev->dev, "received reset event, reset type is %d\n", hdev->reset_level); @@@ -4002,7 -3875,6 +4002,7 @@@ static void hclge_service_task(struct w hclge_update_link_status(hdev); hclge_update_vport_alive(hdev); hclge_sync_vlan_filter(hdev); + if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) { hclge_rfs_filter_expire(hdev); hdev->fd_arfs_expire_timer = 0; @@@ -4031,7 -3903,6 +4031,7 @@@ static int hclge_get_vector(struct hnae int alloc = 0; int i, j;
+ vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num); vector_num = min(hdev->num_msi_left, vector_num);
for (j = 0; j < vector_num; j++) { @@@ -4509,7 -4380,7 +4509,7 @@@ int hclge_rss_init_hw(struct hclge_dev */ if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) { dev_err(&hdev->pdev->dev, - "Configure rss tc size failed, invalid TC_SIZE = %d\n", + "Configure rss tc size failed, invalid TC_SIZE = %u\n", rss_size); return -EINVAL; } @@@ -4687,8 -4558,8 +4687,8 @@@ static int hclge_unmap_ring_frm_vector( return ret; }
-int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, - struct hclge_promisc_param *param) +static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, + struct hclge_promisc_param *param) { struct hclge_promisc_cfg_cmd *req; struct hclge_desc desc; @@@ -4715,9 -4586,8 +4715,9 @@@ return ret; }
-void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, - bool en_mc, bool en_bc, int vport_id) +static void hclge_promisc_param_init(struct hclge_promisc_param *param, + bool en_uc, bool en_mc, bool en_bc, + int vport_id) { if (!param) return; @@@ -4732,21 -4602,12 +4732,21 @@@ param->vf_id = vport_id; }
+int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc, + bool en_mc_pmc, bool en_bc_pmc) +{ + struct hclge_dev *hdev = vport->back; + struct hclge_promisc_param param; + + hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, + vport->vport_id); + return hclge_cmd_set_promisc_mode(hdev, ¶m); +} + static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, bool en_mc_pmc) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - struct hclge_promisc_param param; bool en_bc_pmc = true;
/* For revision 0x20, if broadcast promisc enabled, vlan filter is @@@ -4756,8 -4617,9 +4756,8 @@@ if (handle->pdev->revision == 0x20) en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false;
- hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, - vport->vport_id); - return hclge_cmd_set_promisc_mode(hdev, ¶m); + return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc, + en_bc_pmc); }
static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) @@@ -4859,7 -4721,7 +4859,7 @@@ static int hclge_init_fd_config(struct break; default: dev_err(&hdev->pdev->dev, - "Unsupported flow director mode %d\n", + "Unsupported flow director mode %u\n", hdev->fd_cfg.fd_mode); return -EOPNOTSUPP; } @@@ -5189,7 -5051,7 +5189,7 @@@ static int hclge_config_key(struct hclg true); if (ret) { dev_err(&hdev->pdev->dev, - "fd key_y config fail, loc=%d, ret=%d\n", + "fd key_y config fail, loc=%u, ret=%d\n", rule->queue_id, ret); return ret; } @@@ -5198,7 -5060,7 +5198,7 @@@ true); if (ret) dev_err(&hdev->pdev->dev, - "fd key_x config fail, loc=%d, ret=%d\n", + "fd key_x config fail, loc=%u, ret=%d\n", rule->queue_id, ret); return ret; } @@@ -5447,7 -5309,7 +5447,7 @@@ static int hclge_fd_update_rule_list(st } } else if (!is_add) { dev_err(&hdev->pdev->dev, - "delete fail, rule %d is inexistent\n", + "delete fail, rule %u is inexistent\n", location); return -EINVAL; } @@@ -5687,7 -5549,7 +5687,7 @@@ static int hclge_add_fd_entry(struct hn
if (vf > hdev->num_req_vfs) { dev_err(&hdev->pdev->dev, - "Error: vf id (%d) > max vf num (%d)\n", + "Error: vf id (%u) > max vf num (%u)\n", vf, hdev->num_req_vfs); return -EINVAL; } @@@ -5697,7 -5559,7 +5697,7 @@@
if (ring >= tqps) { dev_err(&hdev->pdev->dev, - "Error: queue id (%d) > max tqp num (%d)\n", + "Error: queue id (%u) > max tqp num (%u)\n", ring, tqps - 1); return -EINVAL; } @@@ -5756,7 -5618,7 +5756,7 @@@ static int hclge_del_fd_entry(struct hn
if (!hclge_fd_rule_exist(hdev, fs->location)) { dev_err(&hdev->pdev->dev, - "Delete fail, rule %d is inexistent\n", fs->location); + "Delete fail, rule %u is inexistent\n", fs->location); return -ENOENT; }
@@@ -5833,7 -5695,7 +5833,7 @@@ static int hclge_restore_fd_entries(str
if (ret) { dev_warn(&hdev->pdev->dev, - "Restore rule %d failed, remove it\n", + "Restore rule %u failed, remove it\n", rule->location); clear_bit(rule->location, hdev->fd_bmap); hlist_del(&rule->rule_node); @@@ -6366,23 -6228,11 +6366,23 @@@ static int hclge_config_switch_param(st
func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0); req = (struct hclge_mac_vlan_switch_cmd *)desc.data; + + /* read current config parameter */ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM, - false); + true); req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL; req->func_id = cpu_to_le32(func_id); - req->switch_param = switch_param; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "read mac vlan switch parameter fail, ret = %d\n", ret); + return ret; + } + + /* modify and write new config parameter */ + hclge_cmd_reuse_desc(&desc, false); + req->switch_param = (req->switch_param & param_mask) | switch_param; req->param_mask = param_mask;
ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@@ -6838,7 -6688,7 +6838,7 @@@ static int hclge_get_mac_vlan_cmd_statu
if (cmdq_resp) { dev_err(&hdev->pdev->dev, - "cmdq execute failed for get_mac_vlan_cmd_status,status=%d.\n", + "cmdq execute failed for get_mac_vlan_cmd_status,status=%u.\n", cmdq_resp); return -EIO; } @@@ -7090,7 -6940,7 +7090,7 @@@ static int hclge_init_umv_space(struct
if (allocated_size < hdev->wanted_umv_size) dev_warn(&hdev->pdev->dev, - "Alloc umv space failed, want %d, get %d\n", + "Alloc umv space failed, want %u, get %u\n", hdev->wanted_umv_size, allocated_size);
mutex_init(&hdev->umv_mutex); @@@ -7258,7 -7108,7 +7258,7 @@@ int hclge_add_uc_addr_common(struct hcl
/* check if we just hit the duplicate */ if (!ret) { - dev_warn(&hdev->pdev->dev, "VF %d mac(%pM) exists\n", + dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n", vport->vport_id, addr); return 0; } @@@ -7439,7 -7289,7 +7439,7 @@@ void hclge_rm_vport_mac_table(struct hc mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC;
list_for_each_entry_safe(mac_cfg, tmp, list, node) { - if (strncmp(mac_cfg->mac_addr, mac_addr, ETH_ALEN) == 0) { + if (ether_addr_equal(mac_cfg->mac_addr, mac_addr)) { if (uc_flag && mac_cfg->hd_tbl_status) hclge_rm_uc_addr_common(vport, mac_addr);
@@@ -7511,7 -7361,7 +7511,7 @@@ static int hclge_get_mac_ethertype_cmd_
if (cmdq_resp) { dev_err(&hdev->pdev->dev, - "cmdq execute failed for get_mac_ethertype_cmd_status, status=%d.\n", + "cmdq execute failed for get_mac_ethertype_cmd_status, status=%u.\n", cmdq_resp); return -EIO; } @@@ -7533,7 -7383,7 +7533,7 @@@ break; default: dev_err(&hdev->pdev->dev, - "add mac ethertype failed for undefined, code=%d.\n", + "add mac ethertype failed for undefined, code=%u.\n", resp_code); return_status = -EIO; } @@@ -7541,67 -7391,6 +7541,67 @@@ return return_status; }
+static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx, + u8 *mac_addr) +{ + struct hclge_mac_vlan_tbl_entry_cmd req; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u16 egress_port = 0; + int i; + + if (is_zero_ether_addr(mac_addr)) + return false; + + memset(&req, 0, sizeof(req)); + hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M, + HCLGE_MAC_EPORT_VFID_S, vport->vport_id); + req.egress_port = cpu_to_le16(egress_port); + hclge_prepare_mac_addr(&req, mac_addr, false); + + if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT) + return true; + + vf_idx += HCLGE_VF_VPORT_START_NUM; + for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + if (i != vf_idx && + ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) + return true; + + return false; +} + +static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, + u8 *mac_addr) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (ether_addr_equal(mac_addr, vport->vf_info.mac)) { + dev_info(&hdev->pdev->dev, + "Specified MAC(=%pM) is same as before, no change committed!\n", + mac_addr); + return 0; + } + + if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) { + dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n", + mac_addr); + return -EEXIST; + } + + ether_addr_copy(vport->vf_info.mac, mac_addr); + dev_info(&hdev->pdev->dev, + "MAC of VF %d has been set to %pM, and it will be reinitialized!\n", + vf, mac_addr); + + return hclge_inform_reset_assert_to_vf(vport); +} + static int hclge_add_mgr_tbl(struct hclge_dev *hdev, const struct hclge_mac_mgr_tbl_entry_cmd *req) { @@@ -7774,7 -7563,7 +7774,7 @@@ static int hclge_set_vf_vlan_common(str bool is_kill, u16 vlan, __be16 proto) { -#define HCLGE_MAX_VF_BYTES 16 + struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_vlan_filter_vf_cfg_cmd *req0; struct hclge_vlan_filter_vf_cfg_cmd *req1; struct hclge_desc desc[2]; @@@ -7783,18 -7572,10 +7783,18 @@@ int ret;
/* if vf vlan table is full, firmware will close vf vlan filter, it - * is unable and unnecessary to add new vlan id to vf vlan filter + * is unable and unnecessary to add new vlan id to vf vlan filter. + * If spoof check is enable, and vf vlan is full, it shouldn't add + * new vlan, because tx packets with these vlan id will be dropped. */ - if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) + if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) { + if (vport->vf_info.spoofchk && vlan) { + dev_err(&hdev->pdev->dev, + "Can't add vlan due to spoof check is on and vf vlan table is full\n"); + return -EPERM; + } return 0; + }
hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_VLAN_FILTER_VF_CFG, false); @@@ -7838,7 -7619,7 +7838,7 @@@ }
dev_err(&hdev->pdev->dev, - "Add vf vlan filter fail, ret =%d.\n", + "Add vf vlan filter fail, ret =%u.\n", req0->resp_code); } else { #define HCLGE_VF_VLAN_DEL_NO_FOUND 1 @@@ -7854,7 -7635,7 +7854,7 @@@ return 0;
dev_err(&hdev->pdev->dev, - "Kill vf vlan filter fail, ret =%d.\n", + "Kill vf vlan filter fail, ret =%u.\n", req0->resp_code); }
@@@ -7873,10 -7654,9 +7873,10 @@@ static int hclge_set_port_vlan_filter(s
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_PF_CFG, false);
- vlan_offset_160 = vlan_id / 160; - vlan_offset_byte = (vlan_id % 160) / 8; - vlan_offset_byte_val = 1 << (vlan_id % 8); + vlan_offset_160 = vlan_id / HCLGE_VLAN_ID_OFFSET_STEP; + vlan_offset_byte = (vlan_id % HCLGE_VLAN_ID_OFFSET_STEP) / + HCLGE_VLAN_BYTE_SIZE; + vlan_offset_byte_val = 1 << (vlan_id % HCLGE_VLAN_BYTE_SIZE);
req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data; req->vlan_offset = vlan_offset_160; @@@ -7904,7 -7684,7 +7904,7 @@@ static int hclge_set_vlan_filter_hw(str proto); if (ret) { dev_err(&hdev->pdev->dev, - "Set %d vport vlan filter config fail, ret =%d.\n", + "Set %u vport vlan filter config fail, ret =%d.\n", vport_id, ret); return ret; } @@@ -7916,7 -7696,7 +7916,7 @@@
if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) { dev_err(&hdev->pdev->dev, - "Add port vlan failed, vport %d is already in vlan %d\n", + "Add port vlan failed, vport %u is already in vlan %u\n", vport_id, vlan_id); return -EINVAL; } @@@ -7924,7 -7704,7 +7924,7 @@@ if (is_kill && !test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) { dev_err(&hdev->pdev->dev, - "Delete port vlan failed, vport %d is not in vlan %d\n", + "Delete port vlan failed, vport %u is not in vlan %u\n", vport_id, vlan_id); return -EINVAL; } @@@ -8292,15 -8072,12 +8292,15 @@@ static void hclge_restore_vlan_table(st }
list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { - if (vlan->hd_tbl_status) - hclge_set_vlan_filter_hw(hdev, - htons(ETH_P_8021Q), - vport->vport_id, - vlan->vlan_id, - false); + int ret; + + if (!vlan->hd_tbl_status) + continue; + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, false); + if (ret) + break; } }
@@@ -8438,16 -8215,13 +8438,16 @@@ static int hclge_set_vf_vlan_filter(str if (hdev->pdev->revision == 0x20) return -EOPNOTSUPP;
+ vport = hclge_get_vf_vport(hdev, vfid); + if (!vport) + return -EINVAL; + /* qos is a 3 bits value, so can not be bigger than 7 */ - if (vfid >= hdev->num_alloc_vfs || vlan > VLAN_N_VID - 1 || qos > 7) + if (vlan > VLAN_N_VID - 1 || qos > 7) return -EINVAL; if (proto != htons(ETH_P_8021Q)) return -EPROTONOSUPPORT;
- vport = &hdev->vport[vfid]; state = hclge_get_port_base_vlan_state(vport, vport->port_base_vlan_cfg.state, vlan); @@@ -8458,12 -8232,21 +8458,12 @@@ vlan_info.qos = qos; vlan_info.vlan_proto = ntohs(proto);
- /* update port based VLAN for PF */ - if (!vfid) { - hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); - ret = hclge_update_port_base_vlan_cfg(vport, state, &vlan_info); - hclge_notify_client(hdev, HNAE3_UP_CLIENT); - - return ret; - } - if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { return hclge_update_port_base_vlan_cfg(vport, state, &vlan_info); } else { ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0], - (u8)vfid, state, + vport->vport_id, state, vlan, qos, ntohs(proto)); return ret; @@@ -8574,7 -8357,6 +8574,7 @@@ int hclge_set_vport_mtu(struct hclge_vp struct hclge_dev *hdev = vport->back; int i, max_frm_size, ret;
+ /* HW supprt 2 layer vlan */ max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN; if (max_frm_size < HCLGE_MAC_MIN_FRAME || max_frm_size > HCLGE_MAC_MAX_FRAME) @@@ -8990,16 -8772,16 +8990,16 @@@ static void hclge_info_show(struct hclg
dev_info(dev, "PF info begin:\n");
- dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps); - dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc); - dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc); - dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport); - dev_info(dev, "Numbers of vmdp vports: %d\n", hdev->num_vmdq_vport); - dev_info(dev, "Numbers of VF for this PF: %d\n", hdev->num_req_vfs); - dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map); - dev_info(dev, "Total buffer size for TX/RX: %d\n", hdev->pkt_buf_size); - dev_info(dev, "TX buffer size for each TC: %d\n", hdev->tx_buf_size); - dev_info(dev, "DV buffer size for each TC: %d\n", hdev->dv_buf_size); + dev_info(dev, "Task queue pairs numbers: %u\n", hdev->num_tqps); + dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc); + dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc); + dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport); + dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport); + dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs); + dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map); + dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size); + dev_info(dev, "TX buffer size for each TC: %u\n", hdev->tx_buf_size); + dev_info(dev, "DV buffer size for each TC: %u\n", hdev->dv_buf_size); dev_info(dev, "This is %s PF\n", hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main"); dev_info(dev, "DCB %s\n", @@@ -9015,9 -8797,10 +9015,9 @@@ static int hclge_init_nic_client_instan { struct hnae3_client *client = vport->nic.client; struct hclge_dev *hdev = ae_dev->priv; - int rst_cnt; + int rst_cnt = hdev->rst_stats.reset_cnt; int ret;
- rst_cnt = hdev->rst_stats.reset_cnt; ret = client->ops->init_instance(&vport->nic); if (ret) return ret; @@@ -9117,6 -8900,7 +9117,6 @@@ static int hclge_init_client_instance(s
switch (client->type) { case HNAE3_CLIENT_KNIC: - hdev->nic_client = client; vport->nic.client = client; ret = hclge_init_nic_client_instance(ae_dev, vport); @@@ -9315,7 -9099,7 +9315,7 @@@ static void hclge_clear_resetting_state ret = hclge_set_vf_rst(hdev, vport->vport_id, false); if (ret) dev_warn(&hdev->pdev->dev, - "clear vf(%d) rst failed %d!\n", + "clear vf(%u) rst failed %d!\n", vport->vport_id, ret); } } @@@ -9337,8 -9121,6 +9337,8 @@@ static int hclge_init_ae_dev(struct hna hdev->reset_type = HNAE3_NONE_RESET; hdev->reset_level = HNAE3_FUNC_RESET; ae_dev->priv = hdev; + + /* HW supprt 2 layer vlan */ hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
mutex_init(&hdev->vport_lock); @@@ -9537,219 -9319,6 +9537,219 @@@ static void hclge_stats_clear(struct hc memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); }
+static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable) +{ + return hclge_config_switch_param(hdev, vf, enable, + HCLGE_SWITCH_ANTI_SPOOF_MASK); +} + +static int hclge_set_vlan_spoofchk(struct hclge_dev *hdev, int vf, bool enable) +{ + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, + HCLGE_FILTER_FE_NIC_INGRESS_B, + enable, vf); +} + +static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable) +{ + int ret; + + ret = hclge_set_mac_spoofchk(hdev, vf, enable); + if (ret) { + dev_err(&hdev->pdev->dev, + "Set vf %d mac spoof check %s failed, ret=%d\n", + vf, enable ? "on" : "off", ret); + return ret; + } + + ret = hclge_set_vlan_spoofchk(hdev, vf, enable); + if (ret) + dev_err(&hdev->pdev->dev, + "Set vf %d vlan spoof check %s failed, ret=%d\n", + vf, enable ? "on" : "off", ret); + + return ret; +} + +static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf, + bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 new_spoofchk = enable ? 1 : 0; + int ret; + + if (hdev->pdev->revision == 0x20) + return -EOPNOTSUPP; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (vport->vf_info.spoofchk == new_spoofchk) + return 0; + + if (enable && test_bit(vport->vport_id, hdev->vf_vlan_full)) + dev_warn(&hdev->pdev->dev, + "vf %d vlan table is full, enable spoof check may cause its packet send fail\n", + vf); + else if (enable && hclge_is_umv_space_full(vport)) + dev_warn(&hdev->pdev->dev, + "vf %d mac table is full, enable spoof check may cause its packet send fail\n", + vf); + + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, enable); + if (ret) + return ret; + + vport->vf_info.spoofchk = new_spoofchk; + return 0; +} + +static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + int ret; + int i; + + if (hdev->pdev->revision == 0x20) + return 0; + + /* resume the vf spoof check state after reset */ + for (i = 0; i < hdev->num_alloc_vport; i++) { + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, + vport->vf_info.spoofchk); + if (ret) + return ret; + + vport++; + } + + return 0; +} + +static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 new_trusted = enable ? 1 : 0; + bool en_bc_pmc; + int ret; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (vport->vf_info.trusted == new_trusted) + return 0; + + /* Disable promisc mode for VF if it is not trusted any more. */ + if (!enable && vport->vf_info.promisc_enable) { + en_bc_pmc = hdev->pdev->revision != 0x20; + ret = hclge_set_vport_promisc_mode(vport, false, false, + en_bc_pmc); + if (ret) + return ret; + vport->vf_info.promisc_enable = 0; + hclge_inform_vf_promisc_info(vport); + } + + vport->vf_info.trusted = new_trusted; + + return 0; +} + +static void hclge_reset_vf_rate(struct hclge_dev *hdev) +{ + int ret; + int vf; + + /* reset vf rate to default value */ + for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) { + struct hclge_vport *vport = &hdev->vport[vf]; + + vport->vf_info.max_tx_rate = 0; + ret = hclge_tm_qs_shaper_cfg(vport, vport->vf_info.max_tx_rate); + if (ret) + dev_err(&hdev->pdev->dev, + "vf%d failed to reset to default, ret=%d\n", + vf - HCLGE_VF_VPORT_START_NUM, ret); + } +} + +static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf, + int min_tx_rate, int max_tx_rate) +{ + if (min_tx_rate != 0 || + max_tx_rate < 0 || max_tx_rate > hdev->hw.mac.max_speed) { + dev_err(&hdev->pdev->dev, + "min_tx_rate:%d [0], max_tx_rate:%d [0, %u]\n", + min_tx_rate, max_tx_rate, hdev->hw.mac.max_speed); + return -EINVAL; + } + + return 0; +} + +static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf, + int min_tx_rate, int max_tx_rate, bool force) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate); + if (ret) + return ret; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (!force && max_tx_rate == vport->vf_info.max_tx_rate) + return 0; + + ret = hclge_tm_qs_shaper_cfg(vport, max_tx_rate); + if (ret) + return ret; + + vport->vf_info.max_tx_rate = max_tx_rate; + + return 0; +} + +static int hclge_resume_vf_rate(struct hclge_dev *hdev) +{ + struct hnae3_handle *handle = &hdev->vport->nic; + struct hclge_vport *vport; + int ret; + int vf; + + /* resume the vf max_tx_rate after reset */ + for (vf = 0; vf < pci_num_vf(hdev->pdev); vf++) { + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + /* zero means max rate, after reset, firmware already set it to + * max rate, so just continue. + */ + if (!vport->vf_info.max_tx_rate) + continue; + + ret = hclge_set_vf_rate(handle, vf, 0, + vport->vf_info.max_tx_rate, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "vf%d failed to resume tx_rate:%u, ret=%d\n", + vf, vport->vf_info.max_tx_rate, ret); + return ret; + } + } + + return 0; +} + static void hclge_reset_vport_state(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; @@@ -9827,9 -9396,6 +9827,9 @@@ static int hclge_reset_ae_dev(struct hn return ret; }
+ /* Log and clear the hw errors those already occurred */ + hclge_handle_all_hns_hw_errors(ae_dev); + /* Re-enable the hw error interrupts because * the interrupts get disabled on global reset. */ @@@ -9852,13 -9418,6 +9852,13 @@@ }
hclge_reset_vport_state(hdev); + ret = hclge_reset_vport_spoofchk(hdev); + if (ret) + return ret; + + ret = hclge_resume_vf_rate(hdev); + if (ret) + return ret;
dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", HCLGE_DRIVER_NAME); @@@ -9871,7 -9430,6 +9871,7 @@@ static void hclge_uninit_ae_dev(struct struct hclge_dev *hdev = ae_dev->priv; struct hclge_mac *mac = &hdev->hw.mac;
+ hclge_reset_vf_rate(hdev); hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev);
@@@ -9936,8 -9494,8 +9936,8 @@@ static int hclge_set_channels(struct hn u16 tc_offset[HCLGE_MAX_TC_NUM] = {0}; struct hclge_dev *hdev = vport->back; u16 tc_size[HCLGE_MAX_TC_NUM] = {0}; - int cur_rss_size = kinfo->rss_size; - int cur_tqps = kinfo->num_tqps; + u16 cur_rss_size = kinfo->rss_size; + u16 cur_tqps = kinfo->num_tqps; u16 tc_valid[HCLGE_MAX_TC_NUM]; u16 roundup_size; u32 *rss_indir; @@@ -9991,7 -9549,7 +9991,7 @@@ out: if (!ret) dev_info(&hdev->pdev->dev, - "Channels changed, rss_size from %d to %d, tqps from %d to %d", + "Channels changed, rss_size from %u to %u, tqps from %u to %u", cur_rss_size, kinfo->rss_size, cur_tqps, kinfo->rss_size * kinfo->num_tc);
@@@ -10240,7 -9798,7 +10240,7 @@@ static int hclge_get_dfx_reg_len(struc return ret; }
- data_len_per_desc = FIELD_SIZEOF(struct hclge_desc, data); + data_len_per_desc = sizeof_member(struct hclge_desc, data); *len = 0; for (i = 0; i < dfx_reg_type_num; i++) { bd_num = bd_num_list[i]; @@@ -10594,12 -10152,6 +10594,12 @@@ static const struct hnae3_ae_ops hclge_ .mac_connect_phy = hclge_mac_connect_phy, .mac_disconnect_phy = hclge_mac_disconnect_phy, .restore_vlan_table = hclge_restore_vlan_table, + .get_vf_config = hclge_get_vf_config, + .set_vf_link_state = hclge_set_vf_link_state, + .set_vf_spoofchk = hclge_set_vf_spoofchk, + .set_vf_trust = hclge_set_vf_trust, + .set_vf_rate = hclge_set_vf_rate, + .set_vf_mac = hclge_set_vf_mac, };
static struct hnae3_ae_algo ae_algo = { diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index fbc39a2480d0,a0f08f94b12b..a1fe95b1a476 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@@ -46,7 -46,7 +46,7 @@@ static int hclge_shaper_para_calc(u32 i #define DIVISOR_CLK (1000 * 8) #define DIVISOR_IR_B_126 (126 * DIVISOR_CLK)
- const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { + static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { 6 * 256, /* Prioriy level */ 6 * 32, /* Prioriy group level */ 6 * 8, /* Port level */ @@@ -511,49 -511,6 +511,49 @@@ static int hclge_tm_qs_bp_cfg(struct hc return hclge_cmd_send(&hdev->hw, &desc, 1); }
+int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate) +{ + struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + struct hclge_qs_shapping_cmd *shap_cfg_cmd; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u8 ir_b, ir_u, ir_s; + u32 shaper_para; + int ret, i; + + if (!max_tx_rate) + max_tx_rate = HCLGE_ETHER_MAX_RATE; + + ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET, + &ir_b, &ir_u, &ir_s); + if (ret) + return ret; + + shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); + + for (i = 0; i < kinfo->num_tc; i++) { + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, + false); + + shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data; + shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i); + shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "vf%u, qs%u failed to set tx_rate:%d, ret=%d\n", + vport->vport_id, shap_cfg_cmd->qs_id, + max_tx_rate, ret); + return ret; + } + } + + return 0; +} + static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; @@@ -575,21 -532,14 +575,21 @@@ /* Set to user value, no larger than max_rss_size. */ if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size && kinfo->req_rss_size <= max_rss_size) { - dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", + dev_info(&hdev->pdev->dev, "rss changes from %u to %u\n", kinfo->rss_size, kinfo->req_rss_size); kinfo->rss_size = kinfo->req_rss_size; } else if (kinfo->rss_size > max_rss_size || (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { + /* if user not set rss, the rss_size should compare with the + * valid msi numbers to ensure one to one map between tqp and + * irq as default. + */ + if (!kinfo->req_rss_size) + max_rss_size = min_t(u16, max_rss_size, + (hdev->num_nic_msi - 1) / + kinfo->num_tc); + /* Set to the maximum specification value (max_rss_size). */ - dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", - kinfo->rss_size, max_rss_size); kinfo->rss_size = max_rss_size; }
@@@ -614,7 -564,7 +614,7 @@@ }
memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc, - FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc)); + sizeof_member(struct hnae3_knic_private_info, prio_tc)); }
static void hclge_tm_vport_info_update(struct hclge_dev *hdev) diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c index d24d8731bef0,8b0c60d754e1..990e29ff7ac4 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@@ -43,7 -43,7 +43,7 @@@ struct i40e_stats */ #define I40E_STAT(_type, _name, _stat) { \ .stat_string = _name, \ - .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ + .sizeof_stat = sizeof_member(_type, _stat), \ .stat_offset = offsetof(_type, _stat) \ }
@@@ -722,14 -722,7 +722,14 @@@ static void i40e_get_settings_link_up_f ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
- if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) { + if ((I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) && + (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info)) { + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + } else if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) { ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); } else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) { ethtool_link_ksettings_add_link_mode(ks, advertising, @@@ -737,6 -730,12 +737,6 @@@ } else { ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); - if (I40E_AQ_SET_FEC_AUTO & req_fec_info) { - ethtool_link_ksettings_add_link_mode(ks, advertising, - FEC_RS); - ethtool_link_ksettings_add_link_mode(ks, advertising, - FEC_BASER); - } } }
@@@ -1438,7 -1437,6 +1438,7 @@@ static int i40e_get_fec_param(struct ne struct i40e_hw *hw = &pf->hw; i40e_status status = 0; int err = 0; + u8 fec_cfg;
/* Get the current phy config */ memset(&abilities, 0, sizeof(abilities)); @@@ -1450,16 -1448,18 +1450,16 @@@ }
fecparam->fec = 0; - if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO) + fec_cfg = abilities.fec_cfg_curr_mod_ext_info; + if (fec_cfg & I40E_AQ_SET_FEC_AUTO) fecparam->fec |= ETHTOOL_FEC_AUTO; - if ((abilities.fec_cfg_curr_mod_ext_info & - I40E_AQ_SET_FEC_REQUEST_RS) || - (abilities.fec_cfg_curr_mod_ext_info & - I40E_AQ_SET_FEC_ABILITY_RS)) + else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_RS | + I40E_AQ_SET_FEC_ABILITY_RS)) fecparam->fec |= ETHTOOL_FEC_RS; - if ((abilities.fec_cfg_curr_mod_ext_info & - I40E_AQ_SET_FEC_REQUEST_KR) || - (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR)) + else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_KR | + I40E_AQ_SET_FEC_ABILITY_KR)) fecparam->fec |= ETHTOOL_FEC_BASER; - if (abilities.fec_cfg_curr_mod_ext_info == 0) + if (fec_cfg == 0) fecparam->fec |= ETHTOOL_FEC_OFF;
if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) @@@ -5112,7 -5112,7 +5112,7 @@@ static int i40e_get_module_info(struct case I40E_MODULE_TYPE_SFP: status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - I40E_I2C_EEPROM_DEV_ADDR, + I40E_I2C_EEPROM_DEV_ADDR, true, I40E_MODULE_SFF_8472_COMP, &sff8472_comp, NULL); if (status) @@@ -5120,7 -5120,7 +5120,7 @@@
status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - I40E_I2C_EEPROM_DEV_ADDR, + I40E_I2C_EEPROM_DEV_ADDR, true, I40E_MODULE_SFF_8472_SWAP, &sff8472_swap, NULL); if (status) @@@ -5152,7 -5152,7 +5152,7 @@@ /* Read from memory page 0. */ status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - 0, + 0, true, I40E_MODULE_REVISION_ADDR, &sff8636_rev, NULL); if (status) @@@ -5223,7 -5223,7 +5223,7 @@@ static int i40e_get_module_eeprom(struc
status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - addr, offset, &value, NULL); + true, addr, offset, &value, NULL); if (status) return -EIO; data[i] = value; @@@ -5242,7 -5242,6 +5242,7 @@@ static int i40e_set_eee(struct net_devi }
static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = { + .get_drvinfo = i40e_get_drvinfo, .set_eeprom = i40e_set_eeprom, .get_eeprom_len = i40e_get_eeprom_len, .get_eeprom = i40e_get_eeprom, diff --combined drivers/net/ethernet/intel/ice/ice_ethtool.c index aec3c6c379df,cde113e7de41..fbbb7c826e66 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@@ -15,7 -15,7 +15,7 @@@ struct ice_stats
#define ICE_STAT(_type, _name, _stat) { \ .stat_string = _name, \ - .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ + .sizeof_stat = sizeof_member(_type, _stat), \ .stat_offset = offsetof(_type, _stat) \ }
@@@ -36,10 -36,10 +36,10 @@@ static int ice_q_stats_len(struct net_d #define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats)
#define ICE_PFC_STATS_LEN ( \ - (FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_rx) + \ - FIELD_SIZEOF(struct ice_pf, stats.priority_xon_rx) + \ - FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_tx) + \ - FIELD_SIZEOF(struct ice_pf, stats.priority_xon_tx)) \ + (sizeof_member(struct ice_pf, stats.priority_xoff_rx) + \ + sizeof_member(struct ice_pf, stats.priority_xon_rx) + \ + sizeof_member(struct ice_pf, stats.priority_xoff_tx) + \ + sizeof_member(struct ice_pf, stats.priority_xon_tx)) \ / sizeof(u64)) #define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \ ICE_VSI_STATS_LEN + ice_q_stats_len(n)) @@@ -156,7 -156,6 +156,7 @@@ struct ice_priv_flag static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT), + ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), };
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) @@@ -248,7 -247,7 +248,7 @@@ ice_get_eeprom(struct net_device *netde int ret = 0; u16 *buf;
- dev = &pf->pdev->dev; + dev = ice_pf_to_dev(pf);
eeprom->magic = hw->vendor_id | (hw->device_id << 16);
@@@ -343,7 -342,6 +343,7 @@@ static u64 ice_eeprom_test(struct net_d static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) { struct ice_pf *pf = (struct ice_pf *)hw->back; + struct device *dev = ice_pf_to_dev(pf); static const u32 patterns[] = { 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF @@@ -359,7 -357,7 +359,7 @@@ val = rd32(hw, reg); if (val == pattern) continue; - dev_err(&pf->pdev->dev, + dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" , __func__, reg, pattern, val); return 1; @@@ -368,7 -366,7 +368,7 @@@ wr32(hw, reg, orig_val); val = rd32(hw, reg); if (val != orig_val) { - dev_err(&pf->pdev->dev, + dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" , __func__, reg, orig_val, val); return 1; @@@ -508,7 -506,7 +508,7 @@@ static int ice_lbtest_create_frame(stru if (!pf) return -EINVAL;
- data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL); if (!data) return -ENOMEM;
@@@ -625,7 -623,7 +625,7 @@@ static int ice_lbtest_receive_frames(st continue;
rx_buf = &rx_ring->rx_buf[i]; - received_buf = page_address(rx_buf->page); + received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
if (ice_lbtest_check_frame(received_buf)) valid_frames++; @@@ -650,11 -648,9 +650,11 @@@ static u64 ice_loopback_test(struct net u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; LIST_HEAD(tmp_list); + struct device *dev; u8 *tx_frame; int i;
+ dev = ice_pf_to_dev(pf); netdev_info(netdev, "loopback test\n");
test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info); @@@ -715,12 -711,12 +715,12 @@@ ret = 10;
lbtest_free_frame: - devm_kfree(&pf->pdev->dev, tx_frame); + devm_kfree(dev, tx_frame); remove_mac_filters: if (ice_remove_mac(&pf->hw, &tmp_list)) netdev_err(netdev, "Could not remove MAC filter for the test VSI"); free_mac_list: - ice_free_fltr_list(&pf->pdev->dev, &tmp_list); + ice_free_fltr_list(dev, &tmp_list); lbtest_mac_dis: /* Disable MAC loopback after the test is completed. */ if (ice_aq_set_mac_loopback(&pf->hw, false, NULL)) @@@ -777,9 -773,6 +777,9 @@@ ice_self_test(struct net_device *netdev struct ice_netdev_priv *np = netdev_priv(netdev); bool if_running = netif_running(netdev); struct ice_pf *pf = np->vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf);
if (eth_test->flags == ETH_TEST_FL_OFFLINE) { netdev_info(netdev, "offline testing starting\n"); @@@ -787,7 -780,7 +787,7 @@@ set_bit(__ICE_TESTING, pf->state);
if (ice_active_vfs(pf)) { - dev_warn(&pf->pdev->dev, + dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); data[ICE_ETH_TEST_REG] = 1; data[ICE_ETH_TEST_EEPROM] = 1; @@@ -822,7 -815,8 +822,7 @@@ int status = ice_open(netdev);
if (status) { - dev_err(&pf->pdev->dev, - "Could not open device %s, err %d", + dev_err(dev, "Could not open device %s, err %d", pf->int_name, status); } } @@@ -967,7 -961,7 +967,7 @@@ static int ice_set_fec_cfg(struct net_d }
/* Get last SW configuration */ - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM;
@@@ -1012,7 -1006,7 +1012,7 @@@ }
done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; }
@@@ -1088,7 -1082,7 +1088,7 @@@ ice_get_fecparam(struct net_device *net break; }
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM;
@@@ -1115,7 -1109,7 +1115,7 @@@ fecparam->fec |= ETHTOOL_FEC_OFF;
done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; }
@@@ -1160,14 -1154,12 +1160,14 @@@ static int ice_set_priv_flags(struct ne DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct device *dev; int ret = 0; u32 i;
if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE)) return -EINVAL;
+ dev = ice_pf_to_dev(pf); set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS); @@@ -1196,7 -1188,7 +1196,7 @@@ * events to respond to. */ if (status) - dev_info(&pf->pdev->dev, + dev_info(dev, "Failed to unreg for LLDP events\n");
/* The AQ call to stop the FW LLDP agent will generate @@@ -1204,14 -1196,20 +1204,14 @@@ */ status = ice_aq_stop_lldp(&pf->hw, true, true, NULL); if (status) - dev_warn(&pf->pdev->dev, - "Fail to stop LLDP agent\n"); + dev_warn(dev, "Fail to stop LLDP agent\n"); /* Use case for having the FW LLDP agent stopped * will likely not need DCB, so failure to init is * not a concern of ethtool */ status = ice_init_pf_dcb(pf, true); if (status) - dev_warn(&pf->pdev->dev, "Fail to init DCB\n"); - - /* Forward LLDP packets to default VSI so that they - * are passed up the stack - */ - ice_cfg_sw_lldp(vsi, false, true); + dev_warn(dev, "Fail to init DCB\n"); } else { enum ice_status status; bool dcbx_agent_status; @@@ -1221,7 -1219,8 +1221,7 @@@ */ status = ice_aq_start_lldp(&pf->hw, true, NULL); if (status) - dev_warn(&pf->pdev->dev, - "Fail to start LLDP Agent\n"); + dev_warn(dev, "Fail to start LLDP Agent\n");
/* AQ command to start FW DCBX agent will fail if * the agent is already started @@@ -1230,9 -1229,10 +1230,9 @@@ &dcbx_agent_status, NULL); if (status) - dev_dbg(&pf->pdev->dev, - "Failed to start FW DCBX\n"); + dev_dbg(dev, "Failed to start FW DCBX\n");
- dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n", + dev_info(dev, "FW DCBX agent is %s\n", dcbx_agent_status ? "ACTIVE" : "DISABLED");
/* Failure to configure MIB change or init DCB is not @@@ -1242,7 -1242,7 +1242,7 @@@ */ status = ice_init_pf_dcb(pf, true); if (status) - dev_dbg(&pf->pdev->dev, "Fail to init DCB\n"); + dev_dbg(dev, "Fail to init DCB\n");
/* Remove rule to direct LLDP packets to default VSI. * The FW LLDP engine will now be consuming them. @@@ -1252,15 -1252,10 +1252,15 @@@ /* Register for MIB change events */ status = ice_cfg_lldp_mib_change(&pf->hw, true); if (status) - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "Fail to enable MIB change events\n"); } } + if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { + /* down and up VSI so that changes of Rx cfg are reflected. */ + ice_down(vsi); + ice_up(vsi); + } clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); return ret; } @@@ -2145,7 -2140,7 +2145,7 @@@ ice_get_link_ksettings(struct net_devic /* flow control is symmetric and always supported */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM;
@@@ -2203,7 -2198,7 +2203,7 @@@ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; }
@@@ -2432,7 -2427,8 +2432,7 @@@ ice_set_link_ksettings(struct net_devic usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX); }
- abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities), - GFP_KERNEL); + abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); if (!abilities) return -ENOMEM;
@@@ -2524,7 -2520,7 +2524,7 @@@ }
done: - devm_kfree(&pf->pdev->dev, abilities); + kfree(abilities); clear_bit(__ICE_CFG_BUSY, pf->state);
return err; @@@ -2581,7 -2577,6 +2581,7 @@@ ice_set_ringparam(struct net_device *ne { struct ice_ring *tx_rings = NULL, *rx_rings = NULL; struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_ring *xdp_rings = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; @@@ -2616,13 -2611,6 +2616,13 @@@ return 0; }
+ /* If there is a AF_XDP UMEM attached to any of Rx rings, + * disallow changing the number of descriptors -- regardless + * if the netdev is running or not. + */ + if (ice_xsk_any_rx_ring_ena(vsi)) + return -EBUSY; + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) @@@ -2636,11 -2624,6 +2636,11 @@@ vsi->tx_rings[i]->count = new_tx_cnt; for (i = 0; i < vsi->alloc_rxq; i++) vsi->rx_rings[i]->count = new_rx_cnt; + if (ice_is_xdp_ena_vsi(vsi)) + for (i = 0; i < vsi->num_xdp_txq; i++) + vsi->xdp_rings[i]->count = new_tx_cnt; + vsi->num_tx_desc = new_tx_cnt; + vsi->num_rx_desc = new_rx_cnt; netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@@ -2652,13 -2635,14 +2652,13 @@@ netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n", vsi->tx_rings[0]->count, new_tx_cnt);
- tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, - sizeof(*tx_rings), GFP_KERNEL); + tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL); if (!tx_rings) { err = -ENOMEM; goto done; }
- for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_txq(vsi, i) { /* clone ring and setup updated count */ tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_cnt; @@@ -2666,42 -2650,15 +2666,42 @@@ tx_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&tx_rings[i]); if (err) { - while (i) { - i--; + while (i--) ice_clean_tx_ring(&tx_rings[i]); - } - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); goto done; } }
+ if (!ice_is_xdp_ena_vsi(vsi)) + goto process_rx; + + /* alloc updated XDP resources */ + netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n", + vsi->xdp_rings[0]->count, new_tx_cnt); + + xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL); + if (!xdp_rings) { + err = -ENOMEM; + goto free_tx; + } + + for (i = 0; i < vsi->num_xdp_txq; i++) { + /* clone ring and setup updated count */ + xdp_rings[i] = *vsi->xdp_rings[i]; + xdp_rings[i].count = new_tx_cnt; + xdp_rings[i].desc = NULL; + xdp_rings[i].tx_buf = NULL; + err = ice_setup_tx_ring(&xdp_rings[i]); + if (err) { + while (i--) + ice_clean_tx_ring(&xdp_rings[i]); + kfree(xdp_rings); + goto free_tx; + } + ice_set_ring_xdp(&xdp_rings[i]); + } + process_rx: if (new_rx_cnt == vsi->rx_rings[0]->count) goto process_link; @@@ -2710,13 -2667,14 +2710,13 @@@ netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n", vsi->rx_rings[0]->count, new_rx_cnt);
- rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, - sizeof(*rx_rings), GFP_KERNEL); + rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL); if (!rx_rings) { err = -ENOMEM; goto done; }
- for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_rxq(vsi, i) { /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; @@@ -2740,7 -2698,7 +2740,7 @@@ rx_unwind i--; ice_free_rx_ring(&rx_rings[i]); } - devm_kfree(&pf->pdev->dev, rx_rings); + kfree(rx_rings); err = -ENOMEM; goto free_tx; } @@@ -2754,15 -2712,15 +2754,15 @@@ process_link ice_down(vsi);
if (tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_txq(vsi, i) { ice_free_tx_ring(vsi->tx_rings[i]); *vsi->tx_rings[i] = tx_rings[i]; } - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); }
if (rx_rings) { - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_rxq(vsi, i) { ice_free_rx_ring(vsi->rx_rings[i]); /* copy the real tail offset */ rx_rings[i].tail = vsi->rx_rings[i]->tail; @@@ -2776,19 -2734,9 +2776,19 @@@ rx_rings[i].next_to_alloc = 0; *vsi->rx_rings[i] = rx_rings[i]; } - devm_kfree(&pf->pdev->dev, rx_rings); + kfree(rx_rings); + } + + if (xdp_rings) { + for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_free_tx_ring(vsi->xdp_rings[i]); + *vsi->xdp_rings[i] = xdp_rings[i]; + } + kfree(xdp_rings); }
+ vsi->num_tx_desc = new_tx_cnt; + vsi->num_rx_desc = new_rx_cnt; ice_up(vsi); } goto done; @@@ -2796,9 -2744,9 +2796,9 @@@ free_tx: /* error cleanup if the Rx allocations failed after getting Tx */ if (tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) + ice_for_each_txq(vsi, i) ice_free_tx_ring(&tx_rings[i]); - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); }
done: @@@ -2846,6 -2794,7 +2846,6 @@@ ice_get_pauseparam(struct net_device *n struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_port_info *pi = np->vsi->port_info; struct ice_aqc_get_phy_caps_data *pcaps; - struct ice_vsi *vsi = np->vsi; struct ice_dcbx_cfg *dcbx_cfg; enum ice_status status;
@@@ -2855,7 -2804,8 +2855,7 @@@
dcbx_cfg = &pi->local_dcbx_cfg;
- pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps), - GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return;
@@@ -2878,7 -2828,7 +2878,7 @@@ pause->rx_pause = 1;
out: - devm_kfree(&vsi->back->pdev->dev, pcaps); + kfree(pcaps); }
/** @@@ -3059,7 -3009,7 +3059,7 @@@ ice_get_rxfh(struct net_device *netdev return -EIO; }
- lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM;
@@@ -3072,7 -3022,7 +3072,7 @@@ indir[i] = (u32)(lut[i]);
out: - devm_kfree(&pf->pdev->dev, lut); + kfree(lut); return ret; }
@@@ -3093,10 -3043,8 +3093,10 @@@ ice_set_rxfh(struct net_device *netdev struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct device *dev; u8 *seed = NULL;
+ dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP;
@@@ -3109,7 -3057,8 +3109,7 @@@ if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = - devm_kzalloc(&pf->pdev->dev, - ICE_VSIQF_HKEY_ARRAY_SIZE, + devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE, GFP_KERNEL); if (!vsi->rss_hkey_user) return -ENOMEM; @@@ -3119,7 -3068,8 +3119,7 @@@ }
if (!vsi->rss_lut_user) { - vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev, - vsi->rss_table_size, + vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size, GFP_KERNEL); if (!vsi->rss_lut_user) return -ENOMEM; @@@ -3142,188 -3092,6 +3142,188 @@@ return 0; }
+/** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + */ +static int ice_get_max_txq(struct ice_pf *pf) +{ + return min_t(int, num_online_cpus(), + pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + */ +static int ice_get_max_rxq(struct ice_pf *pf) +{ + return min_t(int, num_online_cpus(), + pf->hw.func_caps.common_cap.num_rxq); +} + +/** + * ice_get_combined_cnt - return the current number of combined channels + * @vsi: PF VSI pointer + * + * Go through all queue vectors and count ones that have both Rx and Tx ring + * attached + */ +static u32 ice_get_combined_cnt(struct ice_vsi *vsi) +{ + u32 combined = 0; + int q_idx; + + ice_for_each_q_vector(vsi, q_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring && q_vector->tx.ring) + combined++; + } + + return combined; +} + +/** + * ice_get_channels - get the current and max supported channels + * @dev: network interface device structure + * @ch: ethtool channel data structure + */ +static void +ice_get_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + /* check to see if VSI is active */ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + /* report maximum channels */ + ch->max_rx = ice_get_max_rxq(pf); + ch->max_tx = ice_get_max_txq(pf); + ch->max_combined = min_t(int, ch->max_rx, ch->max_tx); + + /* report current channels */ + ch->combined_count = ice_get_combined_cnt(vsi); + ch->rx_count = vsi->num_rxq - ch->combined_count; + ch->tx_count = vsi->num_txq - ch->combined_count; +} + +/** + * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size + * @vsi: VSI to reconfigure RSS LUT on + * @req_rss_size: requested range of queue numbers for hashing + * + * Set the VSI's RSS parameters, configure the RSS LUT based on these. + */ +static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + struct ice_hw *hw; + int err = 0; + u8 *lut; + + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + + if (!req_rss_size) + return -EINVAL; + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* set RSS LUT parameters */ + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + vsi->rss_size = 1; + } else { + struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; + + vsi->rss_size = min_t(int, req_rss_size, + BIT(caps->rss_table_entry_width)); + } + + /* create/set RSS LUT */ + ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); + status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, + vsi->rss_table_size); + if (status) { + dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + err = -EIO; + } + + kfree(lut); + return err; +} + +/** + * ice_set_channels - set the number channels + * @dev: network interface device structure + * @ch: ethtool channel data structure + */ +static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int new_rx = 0, new_tx = 0; + u32 curr_combined; + + /* do not support changing channels in Safe Mode */ + if (ice_is_safe_mode(pf)) { + netdev_err(dev, "Changing channel in Safe Mode is not supported\n"); + return -EOPNOTSUPP; + } + /* do not support changing other_count */ + if (ch->other_count) + return -EINVAL; + + curr_combined = ice_get_combined_cnt(vsi); + + /* these checks are for cases where user didn't specify a particular + * value on cmd line but we get non-zero value anyway via + * get_channels(); look at ethtool.c in ethtool repository (the user + * space part), particularly, do_schannels() routine + */ + if (ch->rx_count == vsi->num_rxq - curr_combined) + ch->rx_count = 0; + if (ch->tx_count == vsi->num_txq - curr_combined) + ch->tx_count = 0; + if (ch->combined_count == curr_combined) + ch->combined_count = 0; + + if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) { + netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n"); + return -EINVAL; + } + + new_rx = ch->combined_count + ch->rx_count; + new_tx = ch->combined_count + ch->tx_count; + + if (new_rx > ice_get_max_rxq(pf)) { + netdev_err(dev, "Maximum allowed Rx channels is %d\n", + ice_get_max_rxq(pf)); + return -EINVAL; + } + if (new_tx > ice_get_max_txq(pf)) { + netdev_err(dev, "Maximum allowed Tx channels is %d\n", + ice_get_max_txq(pf)); + return -EINVAL; + } + + ice_vsi_recfg_qs(vsi, new_rx, new_tx); + + if (new_rx && !netif_is_rxfh_configured(dev)) + return ice_vsi_set_dflt_rss_lut(vsi, new_rx); + + return 0; +} + enum ice_container_type { ICE_RX_CONTAINER, ICE_TX_CONTAINER, @@@ -3363,7 -3131,7 +3363,7 @@@ ice_get_rc_coalesce(struct ethtool_coal ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; break; default: - dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); return -EINVAL; }
@@@ -3503,8 -3271,7 +3503,8 @@@ ice_set_rc_coalesce(enum ice_container_
break; default: - dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type); + dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", + c_type); return -EINVAL; }
@@@ -3601,17 -3368,10 +3601,17 @@@ __ice_set_coalesce(struct net_device *n struct ice_vsi *vsi = np->vsi;
if (q_num < 0) { - int i; + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + /* In some cases if DCB is configured the num_[rx|tx]q + * can be less than vsi->num_q_vectors. This check + * accounts for that so we don't report a false failure + */ + if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq) + goto set_complete;
- ice_for_each_q_vector(vsi, i) { - if (ice_set_q_coalesce(vsi, ec, i)) + if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; } goto set_complete; @@@ -3638,151 -3398,6 +3638,151 @@@ ice_set_per_q_coalesce(struct net_devic return __ice_set_coalesce(netdev, ec, q_num); }
+#define ICE_I2C_EEPROM_DEV_ADDR 0xA0 +#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 +#define ICE_MODULE_TYPE_SFP 0x03 +#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D +#define ICE_MODULE_TYPE_QSFP28 0x11 +#define ICE_MODULE_SFF_ADDR_MODE 0x04 +#define ICE_MODULE_SFF_DIAG_CAPAB 0x40 +#define ICE_MODULE_REVISION_ADDR 0x01 +#define ICE_MODULE_SFF_8472_COMP 0x5E +#define ICE_MODULE_SFF_8472_SWAP 0x5C +#define ICE_MODULE_QSFP_MAX_LEN 640 + +/** + * ice_get_module_info - get SFF module type and revision information + * @netdev: network interface device structure + * @modinfo: module EEPROM size and layout information structure + */ +static int +ice_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u8 sff8472_comp = 0; + u8 sff8472_swap = 0; + u8 sff8636_rev = 0; + u8 value = 0; + + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00, + 0, &value, 1, 0, NULL); + if (status) + return -EIO; + + switch (value) { + case ICE_MODULE_TYPE_SFP: + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_SFF_8472_COMP, 0x00, 0, + &sff8472_comp, 1, 0, NULL); + if (status) + return -EIO; + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_SFF_8472_SWAP, 0x00, 0, + &sff8472_swap, 1, 0, NULL); + if (status) + return -EIO; + + if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else if (sff8472_comp && + (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } + break; + case ICE_MODULE_TYPE_QSFP_PLUS: + case ICE_MODULE_TYPE_QSFP28: + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_REVISION_ADDR, 0x00, 0, + &sff8636_rev, 1, 0, NULL); + if (status) + return -EIO; + /* Check revision compliance */ + if (sff8636_rev > 0x02) { + /* Module is SFF-8636 compliant */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN; + } + break; + default: + netdev_warn(netdev, + "SFF Module Type not recognized.\n"); + return -EINVAL; + } + return 0; +} + +/** + * ice_get_module_eeprom - fill buffer with SFF EEPROM contents + * @netdev: network interface device structure + * @ee: EEPROM dump request structure + * @data: buffer to be filled with EEPROM contents + */ +static int +ice_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + u8 addr = ICE_I2C_EEPROM_DEV_ADDR; + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + bool is_sfp = false; + u16 offset = 0; + u8 value = 0; + u8 page = 0; + int i; + + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, + &value, 1, 0, NULL); + if (status) + return -EIO; + + if (!ee || !ee->len || !data) + return -EINVAL; + + if (value == ICE_MODULE_TYPE_SFP) + is_sfp = true; + + for (i = 0; i < ee->len; i++) { + offset = i + ee->offset; + + /* Check if we need to access the other memory page */ + if (is_sfp) { + if (offset >= ETH_MODULE_SFF_8079_LEN) { + offset -= ETH_MODULE_SFF_8079_LEN; + addr = ICE_I2C_EEPROM_DEV_ADDR2; + } + } else { + while (offset >= ETH_MODULE_SFF_8436_LEN) { + /* Compute memory page number and offset. */ + offset -= ETH_MODULE_SFF_8436_LEN / 2; + page++; + } + } + + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp, + &value, 1, 0, NULL); + if (status) + value = 0; + data[i] = value; + } + return 0; +} + static const struct ethtool_ops ice_ethtool_ops = { .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, @@@ -3813,15 -3428,11 +3813,15 @@@ .get_rxfh_indir_size = ice_get_rxfh_indir_size, .get_rxfh = ice_get_rxfh, .set_rxfh = ice_set_rxfh, + .get_channels = ice_get_channels, + .set_channels = ice_set_channels, .get_ts_info = ethtool_op_get_ts_info, .get_per_queue_coalesce = ice_get_per_q_coalesce, .set_per_queue_coalesce = ice_set_per_q_coalesce, .get_fecparam = ice_get_fecparam, .set_fecparam = ice_set_fecparam, + .get_module_info = ice_get_module_info, + .get_module_eeprom = ice_get_module_eeprom, };
static const struct ethtool_ops ice_ethtool_safe_mode_ops = { @@@ -3840,7 -3451,6 +3840,7 @@@ .get_ringparam = ice_get_ringparam, .set_ringparam = ice_set_ringparam, .nway_reset = ice_nway_reset, + .get_channels = ice_get_channels, };
/** diff --combined drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index ad34f22d44ef,b5867b42ca2f..110a9dfe7192 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@@ -211,7 -211,7 +211,7 @@@ enum ice_flex_rx_mdid /* Rx/Tx Flag64 packet flag bits */ enum ice_flg64_bits { ICE_FLG_PKT_DSI = 0, - ICE_FLG_EVLAN_x8100 = 15, + ICE_FLG_EVLAN_x8100 = 14, ICE_FLG_EVLAN_x9100, ICE_FLG_VLAN_x8100, ICE_FLG_TNL_MAC = 22, @@@ -302,7 -302,7 +302,7 @@@ struct ice_ctx_ele
#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \ .offset = offsetof(struct _struct, _ele), \ - .size_of = FIELD_SIZEOF(struct _struct, _ele), \ + .size_of = sizeof_member(struct _struct, _ele), \ .width = _width, \ .lsb = _lsb, \ } diff --combined drivers/net/ethernet/marvell/mv643xx_eth.c index c43820597218,6d3b7032f842..f578d4132d26 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@@ -1432,11 -1432,11 +1432,11 @@@ struct mv643xx_eth_stats };
#define SSTAT(m) \ - { #m, FIELD_SIZEOF(struct net_device_stats, m), \ + { #m, sizeof_member(struct net_device_stats, m), \ offsetof(struct net_device, stats.m), -1 }
#define MIBSTAT(m) \ - { #m, FIELD_SIZEOF(struct mib_counters, m), \ + { #m, sizeof_member(struct mib_counters, m), \ -1, offsetof(struct mv643xx_eth_private, mib_counters.m) }
static const struct mv643xx_eth_stats mv643xx_eth_stats[] = { @@@ -2590,7 -2590,7 +2590,7 @@@ static void tx_timeout_task(struct work } }
-static void mv643xx_eth_tx_timeout(struct net_device *dev) +static void mv643xx_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mv643xx_eth_private *mp = netdev_priv(dev);
@@@ -2959,16 -2959,15 +2959,16 @@@ static void set_params(struct mv643xx_e static int get_phy_mode(struct mv643xx_eth_private *mp) { struct device *dev = mp->dev->dev.parent; - int iface = -1; + phy_interface_t iface; + int err;
if (dev->of_node) - iface = of_get_phy_mode(dev->of_node); + err = of_get_phy_mode(dev->of_node, &iface);
/* Historical default if unspecified. We could also read/write * the interface state in the PSC1 */ - if (iface < 0) + if (!dev->of_node || err) iface = PHY_INTERFACE_MODE_GMII; return iface; } diff --combined drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index a1202e53710c,f421699d745b..762688149cf1 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@@ -611,7 -611,7 +611,7 @@@ static u32 ptys_get_active_port(struct }
#define MLX4_LINK_MODES_SZ \ - (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8) + (sizeof_member(struct mlx4_ptys_reg, eth_proto_cap) * 8)
enum ethtool_report { SUPPORTED = 0, @@@ -1745,7 -1745,6 +1745,7 @@@ static int mlx4_en_get_rxnfc(struct net err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); break; case ETHTOOL_GRXCLSRLALL: + cmd->data = MAX_NUM_OF_FS_RULES; while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { err = mlx4_en_get_flow(dev, cmd, i); if (!err) @@@ -1812,7 -1811,6 +1812,7 @@@ static int mlx4_en_set_channels(struct struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_port_profile new_prof; struct mlx4_en_priv *tmp; + int total_tx_count; int port_up = 0; int xdp_count; int err = 0; @@@ -1827,12 -1825,13 +1827,12 @@@
mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; - if (channel->tx_count * priv->prof->num_up + xdp_count > - priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) { + total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count; + if (total_tx_count > MAX_TX_RINGS) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", - channel->tx_count * priv->prof->num_up + xdp_count, - MAX_TX_RINGS); + total_tx_count, MAX_TX_RINGS); goto out; }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d60577484567,82e6972f68b7..78aca2681b71 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@@ -209,7 -209,7 +209,7 @@@ enum fs_i_lock_class };
static const struct rhashtable_params rhash_fte = { - .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_len = sizeof_member(struct fs_fte, val), .key_offset = offsetof(struct fs_fte, val), .head_offset = offsetof(struct fs_fte, hash), .automatic_shrinking = true, @@@ -217,7 -217,7 +217,7 @@@ };
static const struct rhashtable_params rhash_fg = { - .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_len = sizeof_member(struct mlx5_flow_group, mask), .key_offset = offsetof(struct mlx5_flow_group, mask), .head_offset = offsetof(struct mlx5_flow_group, hash), .automatic_shrinking = true, @@@ -531,16 -531,9 +531,16 @@@ static void del_hw_fte(struct fs_node * } }
+static void del_sw_fte_rcu(struct rcu_head *head) +{ + struct fs_fte *fte = container_of(head, struct fs_fte, rcu); + struct mlx5_flow_steering *steering = get_steering(&fte->node); + + kmem_cache_free(steering->ftes_cache, fte); +} + static void del_sw_fte(struct fs_node *node) { - struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; struct fs_fte *fte; int err; @@@ -553,8 -546,7 +553,8 @@@ rhash_fte); WARN_ON(err); ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); - kmem_cache_free(steering->ftes_cache, fte); + + call_rcu(&fte->rcu, del_sw_fte_rcu); }
static void del_hw_flow_group(struct fs_node *node) @@@ -587,7 -579,7 +587,7 @@@ static void del_sw_flow_group(struct fs
rhashtable_destroy(&fg->ftes_hash); ida_destroy(&fg->fte_allocator); - if (ft->autogroup.active) + if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size) ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, @@@ -1134,8 -1126,6 +1134,8 @@@ mlx5_create_auto_grouped_flow_table(str
ft->autogroup.active = true; ft->autogroup.required_groups = max_num_groups; + /* We save place for flow groups in addition to max types */ + ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
return ft; } @@@ -1338,7 -1328,8 +1338,7 @@@ static struct mlx5_flow_group *alloc_au return ERR_PTR(-ENOENT);
if (ft->autogroup.num_groups < ft->autogroup.required_groups) - /* We save place for flow groups in addition to max types */ - group_size = ft->max_fte / (ft->autogroup.required_groups + 1); + group_size = ft->autogroup.group_size;
/* ft->max_fte == ft->autogroup.max_types */ if (group_size == 0) @@@ -1365,8 -1356,7 +1365,8 @@@ if (IS_ERR(fg)) goto out;
- ft->autogroup.num_groups++; + if (group_size == ft->autogroup.group_size) + ft->autogroup.num_groups++;
out: return fg; @@@ -1633,47 -1623,22 +1633,47 @@@ static u64 matched_fgs_get_version(stru }
static struct fs_fte * -lookup_fte_locked(struct mlx5_flow_group *g, - const u32 *match_value, - bool take_write) +lookup_fte_for_write_locked(struct mlx5_flow_group *g, const u32 *match_value) { struct fs_fte *fte_tmp;
- if (take_write) - nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); - else - nested_down_read_ref_node(&g->node, FS_LOCK_PARENT); - fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, - rhash_fte); + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, rhash_fte); if (!fte_tmp || !tree_get_node(&fte_tmp->node)) { fte_tmp = NULL; goto out; } + + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node, false); + fte_tmp = NULL; + goto out; + } + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + +out: + up_write_ref_node(&g->node, false); + return fte_tmp; +} + +static struct fs_fte * +lookup_fte_for_read_locked(struct mlx5_flow_group *g, const u32 *match_value) +{ + struct fs_fte *fte_tmp; + + if (!tree_get_node(&g->node)) + return NULL; + + rcu_read_lock(); + fte_tmp = rhashtable_lookup(&g->ftes_hash, match_value, rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) { + rcu_read_unlock(); + fte_tmp = NULL; + goto out; + } + rcu_read_unlock(); + if (!fte_tmp->node.active) { tree_put_node(&fte_tmp->node, false); fte_tmp = NULL; @@@ -1681,21 -1646,14 +1681,21 @@@ }
nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + out: - if (take_write) - up_write_ref_node(&g->node, false); - else - up_read_ref_node(&g->node); + tree_put_node(&g->node, false); return fte_tmp; }
+static struct fs_fte * +lookup_fte_locked(struct mlx5_flow_group *g, const u32 *match_value, bool write) +{ + if (write) + return lookup_fte_for_write_locked(g, match_value); + else + return lookup_fte_for_read_locked(g, match_value); +} + static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct list_head *match_head, @@@ -1856,13 -1814,6 +1856,13 @@@ search_again_locked return rule; }
+ fte = alloc_fte(ft, spec, flow_act); + if (IS_ERR(fte)) { + up_write_ref_node(&ft->node, false); + err = PTR_ERR(fte); + goto err_alloc_fte; + } + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); up_write_ref_node(&ft->node, false);
@@@ -1870,9 -1821,17 +1870,9 @@@ if (err) goto err_release_fg;
- fte = alloc_fte(ft, spec, flow_act); - if (IS_ERR(fte)) { - err = PTR_ERR(fte); - goto err_release_fg; - } - err = insert_fte(g, fte); - if (err) { - kmem_cache_free(steering->ftes_cache, fte); + if (err) goto err_release_fg; - }
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); @@@ -1884,8 -1843,6 +1884,8 @@@
err_release_fg: up_write_ref_node(&g->node, false); + kmem_cache_free(steering->ftes_cache, fte); +err_alloc_fte: tree_put_node(&g->node, false); return ERR_PTR(err); } @@@ -2402,17 -2359,9 +2402,17 @@@ static void set_prio_attrs_in_prio(stru int acc_level_ns = acc_level;
prio->start_level = acc_level; - fs_for_each_ns(ns, prio) + fs_for_each_ns(ns, prio) { /* This updates start_level and num_levels of ns's priority descendants */ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + + /* If this a prio with chains, and we can jump from one chain + * (namepsace) to another, so we accumulate the levels + */ + if (prio->node.type == FS_TYPE_PRIO_CHAINS) + acc_level = acc_level_ns; + } + if (!prio->num_levels) prio->num_levels = acc_level_ns - prio->start_level; WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); @@@ -2601,109 -2550,58 +2601,109 @@@ out_err steering->rdma_rx_root_ns = NULL; return err; } -static int init_fdb_root_ns(struct mlx5_flow_steering *steering) + +/* FT and tc chains are stored in the same array so we can re-use the + * mlx5_get_fdb_sub_ns() and tc api for FT chains. + * When creating a new ns for each chain store it in the first available slot. + * Assume tc chains are created and stored first and only then the FT chain. + */ +static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct mlx5_flow_namespace *ns) +{ + int chain = 0; + + while (steering->fdb_sub_ns[chain]) + ++chain; + + steering->fdb_sub_ns[chain] = ns; +} + +static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct fs_prio *maj_prio) { struct mlx5_flow_namespace *ns; - struct fs_prio *maj_prio; struct fs_prio *min_prio; + int prio; + + ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) { + min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO); + if (IS_ERR(min_prio)) + return PTR_ERR(min_prio); + } + + store_fdb_sub_ns_prio_chain(steering, ns); + + return 0; +} + +static int create_fdb_chains(struct mlx5_flow_steering *steering, + int fs_prio, + int chains) +{ + struct fs_prio *maj_prio; int levels; int chain; - int prio; int err;
- steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); - if (!steering->fdb_root_ns) - return -ENOMEM; + levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains; + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, + fs_prio, + levels); + if (IS_ERR(maj_prio)) + return PTR_ERR(maj_prio); + + for (chain = 0; chain < chains; chain++) { + err = create_fdb_sub_ns_prio_chain(steering, maj_prio); + if (err) + return err; + } + + return 0; +} + +static int create_fdb_fast_path(struct mlx5_flow_steering *steering) +{ + int err;
- steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * - (FDB_MAX_CHAIN + 1), GFP_KERNEL); + steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS, + sizeof(*steering->fdb_sub_ns), + GFP_KERNEL); if (!steering->fdb_sub_ns) return -ENOMEM;
+ err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1); + if (err) + return err; + + return 0; +} + +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *maj_prio; + int err; + + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) + return -ENOMEM; + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 1); if (IS_ERR(maj_prio)) { err = PTR_ERR(maj_prio); goto out_err; } - - levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); - maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, - FDB_FAST_PATH, - levels); - if (IS_ERR(maj_prio)) { - err = PTR_ERR(maj_prio); + err = create_fdb_fast_path(steering); + if (err) goto out_err; - } - - for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { - ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); - if (IS_ERR(ns)) { - err = PTR_ERR(ns); - goto out_err; - } - - for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) { - min_prio = fs_create_prio(ns, prio, 2); - if (IS_ERR(min_prio)) { - err = PTR_ERR(min_prio); - goto out_err; - } - } - - steering->fdb_sub_ns[chain] = ns; - }
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1); if (IS_ERR(maj_prio)) { diff --combined drivers/net/ethernet/netronome/nfp/bpf/jit.c index c80bb83c8ac9,276af07a2841..a2c02be6e34c --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@@ -2652,17 -2652,17 +2652,17 @@@ static int mem_ldx_skb(struct nfp_prog
switch (meta->insn.off) { case offsetof(struct __sk_buff, len): - if (size != FIELD_SIZEOF(struct __sk_buff, len)) + if (size != sizeof_member(struct __sk_buff, len)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); break; case offsetof(struct __sk_buff, data): - if (size != FIELD_SIZEOF(struct __sk_buff, data)) + if (size != sizeof_member(struct __sk_buff, data)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; case offsetof(struct __sk_buff, data_end): - if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) + if (size != sizeof_member(struct __sk_buff, data_end)) return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); @@@ -2683,12 -2683,12 +2683,12 @@@ static int mem_ldx_xdp(struct nfp_prog
switch (meta->insn.off) { case offsetof(struct xdp_md, data): - if (size != FIELD_SIZEOF(struct xdp_md, data)) + if (size != sizeof_member(struct xdp_md, data)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; case offsetof(struct xdp_md, data_end): - if (size != FIELD_SIZEOF(struct xdp_md, data_end)) + if (size != sizeof_member(struct xdp_md, data_end)) return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); @@@ -3952,7 -3952,7 +3952,7 @@@ static void nfp_bpf_opt_neg_add_sub(str static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta1, *meta2; - const s32 exp_mask[] = { + static const s32 exp_mask[] = { [BPF_B] = 0x000000ffU, [BPF_H] = 0x0000ffffU, [BPF_W] = 0xffffffffU, diff --combined drivers/net/ethernet/netronome/nfp/bpf/offload.c index 95a0d3910e31,496a08fd6077..1d71fe20c7bd --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@@ -46,7 -46,9 +46,7 @@@ nfp_map_ptr_record(struct nfp_app_bpf * /* Grab a single ref to the map for our record. The prog destroy ndo * happens after free_used_maps(). */ - map = bpf_map_inc(map, false); - if (IS_ERR(map)) - return PTR_ERR(map); + bpf_map_inc(map);
record = kmalloc(sizeof(*record), GFP_KERNEL); if (!record) { @@@ -374,7 -376,7 +374,7 @@@ nfp_bpf_map_alloc(struct nfp_app_bpf *b }
use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) * - FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]); + sizeof_member(struct nfp_bpf_map, use_map[0]);
nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER); if (!nfp_map) @@@ -458,8 -460,8 +458,8 @@@ int nfp_bpf_event_output(struct nfp_app return -EINVAL;
rcu_read_lock(); - record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id, - nfp_bpf_maps_neutral_params); + record = rhashtable_lookup(&bpf->maps_neutral, &map_id, + nfp_bpf_maps_neutral_params); if (!record || map_id_full > U32_MAX) { rcu_read_unlock(); cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n", diff --combined drivers/net/ethernet/ti/netcp_ethss.c index 86a3f42a3dcc,452597f7f5ae..4b54c1a324a7 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@@ -783,28 -783,28 +783,28 @@@ struct netcp_ethtool_stat #define GBE_STATSA_INFO(field) \ { \ "GBE_A:"#field, GBE_STATSA_MODULE, \ - FIELD_SIZEOF(struct gbe_hw_stats, field), \ + sizeof_member(struct gbe_hw_stats, field), \ offsetof(struct gbe_hw_stats, field) \ }
#define GBE_STATSB_INFO(field) \ { \ "GBE_B:"#field, GBE_STATSB_MODULE, \ - FIELD_SIZEOF(struct gbe_hw_stats, field), \ + sizeof_member(struct gbe_hw_stats, field), \ offsetof(struct gbe_hw_stats, field) \ }
#define GBE_STATSC_INFO(field) \ { \ "GBE_C:"#field, GBE_STATSC_MODULE, \ - FIELD_SIZEOF(struct gbe_hw_stats, field), \ + sizeof_member(struct gbe_hw_stats, field), \ offsetof(struct gbe_hw_stats, field) \ }
#define GBE_STATSD_INFO(field) \ { \ "GBE_D:"#field, GBE_STATSD_MODULE, \ - FIELD_SIZEOF(struct gbe_hw_stats, field), \ + sizeof_member(struct gbe_hw_stats, field), \ offsetof(struct gbe_hw_stats, field) \ }
@@@ -957,7 -957,7 +957,7 @@@ static const struct netcp_ethtool_stat #define GBENU_STATS_HOST(field) \ { \ "GBE_HOST:"#field, GBENU_STATS0_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
@@@ -967,56 -967,56 +967,56 @@@ #define GBENU_STATS_P1(field) \ { \ "GBE_P1:"#field, GBENU_STATS1_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P2(field) \ { \ "GBE_P2:"#field, GBENU_STATS2_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P3(field) \ { \ "GBE_P3:"#field, GBENU_STATS3_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P4(field) \ { \ "GBE_P4:"#field, GBENU_STATS4_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P5(field) \ { \ "GBE_P5:"#field, GBENU_STATS5_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P6(field) \ { \ "GBE_P6:"#field, GBENU_STATS6_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P7(field) \ { \ "GBE_P7:"#field, GBENU_STATS7_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
#define GBENU_STATS_P8(field) \ { \ "GBE_P8:"#field, GBENU_STATS8_MODULE, \ - FIELD_SIZEOF(struct gbenu_hw_stats, field), \ + sizeof_member(struct gbenu_hw_stats, field), \ offsetof(struct gbenu_hw_stats, field) \ }
@@@ -1607,21 -1607,21 +1607,21 @@@ static const struct netcp_ethtool_stat #define XGBE_STATS0_INFO(field) \ { \ "GBE_0:"#field, XGBE_STATS0_MODULE, \ - FIELD_SIZEOF(struct xgbe_hw_stats, field), \ + sizeof_member(struct xgbe_hw_stats, field), \ offsetof(struct xgbe_hw_stats, field) \ }
#define XGBE_STATS1_INFO(field) \ { \ "GBE_1:"#field, XGBE_STATS1_MODULE, \ - FIELD_SIZEOF(struct xgbe_hw_stats, field), \ + sizeof_member(struct xgbe_hw_stats, field), \ offsetof(struct xgbe_hw_stats, field) \ }
#define XGBE_STATS2_INFO(field) \ { \ "GBE_2:"#field, XGBE_STATS2_MODULE, \ - FIELD_SIZEOF(struct xgbe_hw_stats, field), \ + sizeof_member(struct xgbe_hw_stats, field), \ offsetof(struct xgbe_hw_stats, field) \ }
@@@ -2291,7 -2291,6 +2291,7 @@@ static int gbe_slave_open(struct gbe_in struct gbe_slave *slave = gbe_intf->slave; phy_interface_t phy_mode; bool has_phy = false; + int err;
void (*hndlr)(struct net_device *) = gbe_adjust_link;
@@@ -2321,11 -2320,11 +2321,11 @@@ slave->phy_port_t = PORT_MII; } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { has_phy = true; - phy_mode = of_get_phy_mode(slave->node); + err = of_get_phy_mode(slave->node, &phy_mode); /* if phy-mode is not present, default to * PHY_INTERFACE_MODE_RGMII */ - if (phy_mode < 0) + if (err) phy_mode = PHY_INTERFACE_MODE_RGMII;
if (!phy_interface_mode_is_rgmii(phy_mode)) { diff --combined drivers/net/geneve.c index 5c6b7fc04ea6,60551a58739e..d17396d74b56 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@@ -853,9 -853,7 +853,9 @@@ static struct dst_entry *geneve_get_v6_ if (dst) return dst; } - if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { + dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6, + NULL); + if (IS_ERR(dst)) { netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); return ERR_PTR(-ENETUNREACH); } @@@ -1156,7 -1154,7 +1156,7 @@@ static void geneve_setup(struct net_dev
static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_ID] = { .type = NLA_U32 }, - [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GENEVE_REMOTE] = { .len = sizeof_member(struct iphdr, daddr) }, [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, diff --combined drivers/net/hyperv/netvsc_drv.c index eff8fef4f775,6102264d9978..f15a712f2332 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@@ -285,9 -285,9 +285,9 @@@ static inline u32 netvsc_get_hash else if (flow.basic.n_proto == htons(ETH_P_IPV6)) hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); else - hash = 0; + return 0;
- skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); + __skb_set_sw_hash(skb, hash, false); }
return hash; @@@ -571,7 -571,7 +571,7 @@@ static int netvsc_start_xmit(struct sk_
/* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_member(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb;
packet->q_idx = skb_get_queue_mapping(skb); @@@ -766,7 -766,6 +766,7 @@@ static struct sk_buff *netvsc_alloc_rec const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan; const struct ndis_tcp_ip_checksum_info *csum_info = nvchan->rsc.csum_info; + const u32 *hash_info = nvchan->rsc.hash_info; struct sk_buff *skb; int i;
@@@ -796,16 -795,14 +796,16 @@@ skb->protocol == htons(ETH_P_IP)) netvsc_comp_ipcsum(skb);
- /* Do L4 checksum offload if enabled and present. - */ + /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || csum_info->receive.udp_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; }
+ if (hash_info && (net->features & NETIF_F_RXHASH)) + skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4); + if (vlan) { u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) | (vlan->cfi ? VLAN_CFI_MASK : 0); @@@ -985,7 -982,7 +985,7 @@@ static int netvsc_attach(struct net_dev if (netif_running(ndev)) { ret = rndis_filter_open(nvdev); if (ret) - return ret; + goto err;
rdev = nvdev->extension; if (!rdev->link_state) @@@ -993,13 -990,6 +993,13 @@@ }
return 0; + +err: + netif_device_detach(ndev); + + rndis_filter_device_remove(hdev, nvdev); + + return ret; }
static int netvsc_set_channels(struct net_device *net, @@@ -1817,10 -1807,8 +1817,10 @@@ static int netvsc_set_features(struct n
ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
- if (ret) + if (ret) { features ^= NETIF_F_LRO; + ndev->features = features; + }
syncvf: if (!vf_netdev) @@@ -2347,6 -2335,8 +2347,6 @@@ static int netvsc_probe(struct hv_devic NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features;
- netdev_lockdep_set_classes(net); - /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) @@@ -2424,61 -2414,6 +2424,61 @@@ static int netvsc_remove(struct hv_devi return 0; }
+static int netvsc_suspend(struct hv_device *dev) +{ + struct net_device_context *ndev_ctx; + struct net_device *vf_netdev, *net; + struct netvsc_device *nvdev; + int ret; + + net = hv_get_drvdata(dev); + + ndev_ctx = netdev_priv(net); + cancel_delayed_work_sync(&ndev_ctx->dwork); + + rtnl_lock(); + + nvdev = rtnl_dereference(ndev_ctx->nvdev); + if (nvdev == NULL) { + ret = -ENODEV; + goto out; + } + + vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + if (vf_netdev) + netvsc_unregister_vf(vf_netdev); + + /* Save the current config info */ + ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); + + ret = netvsc_detach(net, nvdev); +out: + rtnl_unlock(); + + return ret; +} + +static int netvsc_resume(struct hv_device *dev) +{ + struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *net_device_ctx; + struct netvsc_device_info *device_info; + int ret; + + rtnl_lock(); + + net_device_ctx = netdev_priv(net); + device_info = net_device_ctx->saved_netvsc_dev_info; + + ret = netvsc_attach(net, device_info); + + rtnl_unlock(); + + kfree(device_info); + net_device_ctx->saved_netvsc_dev_info = NULL; + + return ret; +} static const struct hv_vmbus_device_id id_table[] = { /* Network guid */ { HV_NIC_GUID, }, @@@ -2493,8 -2428,6 +2493,8 @@@ static struct hv_driver netvsc_drv = .id_table = id_table, .probe = netvsc_probe, .remove = netvsc_remove, + .suspend = netvsc_suspend, + .resume = netvsc_resume, .driver = { .probe_type = PROBE_FORCE_SYNCHRONOUS, }, diff --combined drivers/net/usb/usbnet.c index bc88923db369,217ea0126f93..c26b72696576 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@@ -1293,7 -1293,7 +1293,7 @@@ static void tx_complete (struct urb *ur
/*-------------------------------------------------------------------------*/
-void usbnet_tx_timeout (struct net_device *net) +void usbnet_tx_timeout (struct net_device *net, unsigned int txqueue) { struct usbnet *dev = netdev_priv(net);
@@@ -1573,13 -1573,6 +1573,13 @@@ static void usbnet_bh (struct timer_lis } }
+static void usbnet_bh_tasklet(unsigned long data) +{ + struct timer_list *t = (struct timer_list *)data; + + usbnet_bh(t); +} +
/*------------------------------------------------------------------------- * @@@ -1707,7 -1700,7 +1707,7 @@@ usbnet_probe (struct usb_interface *ude skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); skb_queue_head_init(&dev->rxq_pause); - dev->bh.func = (void (*)(unsigned long))usbnet_bh; + dev->bh.func = usbnet_bh_tasklet; dev->bh.data = (unsigned long)&dev->delay; INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); @@@ -2184,7 -2177,7 +2184,7 @@@ static int __init usbnet_init(void { /* Compiler should optimize this out. */ BUILD_BUG_ON( - FIELD_SIZEOF(struct sk_buff, cb) < sizeof(struct skb_data)); + sizeof_member(struct sk_buff, cb) < sizeof(struct skb_data));
eth_random_addr(node_id); return 0; diff --combined drivers/net/vxlan.c index 4c34375c2e22,27bfde225a73..bfad6bee99bd --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@@ -793,7 -793,8 +793,7 @@@ static int vxlan_gro_complete(struct so return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); }
-static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, - const u8 *mac, __u16 state, +static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state, __be32 src_vni, __u16 ndm_flags) { struct vxlan_fdb *f; @@@ -834,7 -835,7 +834,7 @@@ static int vxlan_fdb_create(struct vxla return -ENOSPC;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); - f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags); + f = vxlan_fdb_alloc(mac, state, src_vni, ndm_flags); if (!f) return -ENOMEM;
@@@ -2275,6 -2276,7 +2275,6 @@@ static struct dst_entry *vxlan6_get_rou bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; - int err;
if (!sock6) return ERR_PTR(-EIO); @@@ -2297,9 -2299,10 +2297,9 @@@ fl6.fl6_dport = dport; fl6.fl6_sport = sport;
- err = ipv6_stub->ipv6_dst_lookup(vxlan->net, - sock6->sock->sk, - &ndst, &fl6); - if (unlikely(err < 0)) { + ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, + &fl6, NULL); + if (unlikely(IS_ERR(ndst))) { netdev_dbg(dev, "no route to %pI6\n", daddr); return ERR_PTR(-ENETUNREACH); } @@@ -2484,11 -2487,9 +2484,11 @@@ static void vxlan_xmit_one(struct sk_bu vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; - if (info->options_len && - info->key.tun_flags & TUNNEL_VXLAN_OPT) + if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + if (info->options_len < sizeof(*md)) + goto drop; md = ip_tunnel_info_opts(info); + } ttl = info->key.ttl; tos = info->key.tos; label = info->key.label; @@@ -3069,10 -3070,10 +3069,10 @@@ static void vxlan_raw_setup(struct net_
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, - [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_GROUP] = { .len = sizeof_member(struct iphdr, daddr) }, [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, - [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_VXLAN_LOCAL] = { .len = sizeof_member(struct iphdr, saddr) }, [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, @@@ -3173,29 -3174,9 +3173,29 @@@ static void vxlan_get_drvinfo(struct ne strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver)); }
+static int vxlan_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; + struct net_device *lowerdev = __dev_get_by_index(vxlan->net, + dst->remote_ifindex); + + if (!lowerdev) { + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.port = PORT_OTHER; + cmd->base.speed = SPEED_UNKNOWN; + + return 0; + } + + return __ethtool_get_link_ksettings(lowerdev, cmd); +} + static const struct ethtool_ops vxlan_ethtool_ops = { - .get_drvinfo = vxlan_get_drvinfo, - .get_link = ethtool_op_get_link, + .get_drvinfo = vxlan_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ksettings = vxlan_get_link_ksettings, };
static struct socket *vxlan_create_sock(struct net *net, bool ipv6, @@@ -3585,13 -3566,10 +3585,13 @@@ static int __vxlan_dev_create(struct ne { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct net_device *remote_dev = NULL; struct vxlan_fdb *f = NULL; bool unregister = false; + struct vxlan_rdst *dst; int err;
+ dst = &vxlan->default_dst; err = vxlan_dev_configure(net, dev, conf, false, extack); if (err) return err; @@@ -3599,14 -3577,14 +3599,14 @@@ dev->ethtool_ops = &vxlan_ethtool_ops;
/* create an fdb entry for a valid default destination */ - if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { + if (!vxlan_addr_any(&dst->remote_ip)) { err = vxlan_fdb_create(vxlan, all_zeros_mac, - &vxlan->default_dst.remote_ip, + &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, vxlan->cfg.dst_port, - vxlan->default_dst.remote_vni, - vxlan->default_dst.remote_vni, - vxlan->default_dst.remote_ifindex, + dst->remote_vni, + dst->remote_vni, + dst->remote_ifindex, NTF_SELF, &f); if (err) return err; @@@ -3617,41 -3595,26 +3617,41 @@@ goto errout; unregister = true;
+ if (dst->remote_ifindex) { + remote_dev = __dev_get_by_index(net, dst->remote_ifindex); + if (!remote_dev) + goto errout; + + err = netdev_upper_dev_link(remote_dev, dev, extack); + if (err) + goto errout; + } + err = rtnl_configure_link(dev, NULL); if (err) - goto errout; + goto unlink;
if (f) { - vxlan_fdb_insert(vxlan, all_zeros_mac, - vxlan->default_dst.remote_vni, f); + vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
/* notify default fdb entry */ err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, true, extack); if (err) { vxlan_fdb_destroy(vxlan, f, false, false); + if (remote_dev) + netdev_upper_dev_unlink(remote_dev, dev); goto unregister; } }
list_add(&vxlan->next, &vn->vxlan_list); + if (remote_dev) + dst->remote_dev = remote_dev; return 0; - +unlink: + if (remote_dev) + netdev_upper_dev_unlink(remote_dev, dev); errout: /* unregister_netdevice() destroys the default FDB entry with deletion * notification. But the addition notification was not sent yet, so @@@ -3969,12 -3932,11 +3969,12 @@@ static int vxlan_changelink(struct net_ struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_rdst *dst = &vxlan->default_dst; struct net_device *lowerdev; struct vxlan_config conf; + struct vxlan_rdst *dst; int err;
+ dst = &vxlan->default_dst; err = vxlan_nl2conf(tb, data, dev, &conf, true, extack); if (err) return err; @@@ -3984,14 -3946,6 +3984,14 @@@ if (err) return err;
+ if (dst->remote_dev == lowerdev) + lowerdev = NULL; + + err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev, + extack); + if (err) + return err; + /* handle default dst entry */ if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) { u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni); @@@ -4008,8 -3962,6 +4008,8 @@@ NTF_SELF, true, extack); if (err) { spin_unlock_bh(&vxlan->hash_lock[hash_index]); + netdev_adjacent_change_abort(dst->remote_dev, + lowerdev, dev); return err; } } @@@ -4027,11 -3979,6 +4027,11 @@@ if (conf.age_interval != vxlan->cfg.age_interval) mod_timer(&vxlan->age_timer, jiffies);
+ netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev); + if (lowerdev && lowerdev != dst->remote_dev) { + dst->remote_dev = lowerdev; + netdev_update_lockdep_key(lowerdev); + } vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true); return 0; } @@@ -4044,8 -3991,6 +4044,8 @@@ static void vxlan_dellink(struct net_de
list_del(&vxlan->next); unregister_netdevice_queue(dev, head); + if (vxlan->default_dst.remote_dev) + netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev); }
static size_t vxlan_get_size(const struct net_device *dev) diff --combined drivers/s390/net/qeth_core_main.c index ce7ff1abbef3,5c6642b2415a..90c34d38b44e --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -515,9 -515,7 +515,9 @@@ static int __qeth_issue_next_read(struc
QETH_CARD_TEXT(card, 6, "noirqpnd"); rc = ccw_device_start(channel->ccwdev, ccw, (addr_t) iob, 0, 0); - if (rc) { + if (!rc) { + channel->active_cmd = iob; + } else { QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); atomic_set(&channel->irq_pending, 0); @@@ -903,30 -901,30 +903,30 @@@ static int qeth_get_problem(struct qeth CCW_DEVID(cdev), dstat, cstat); print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET, 16, 1, irb, 64, 1); - return 1; + return -EIO; }
if (dstat & DEV_STAT_UNIT_CHECK) { if (sense[SENSE_RESETTING_EVENT_BYTE] & SENSE_RESETTING_EVENT_FLAG) { QETH_CARD_TEXT(card, 2, "REVIND"); - return 1; + return -EIO; } if (sense[SENSE_COMMAND_REJECT_BYTE] & SENSE_COMMAND_REJECT_FLAG) { QETH_CARD_TEXT(card, 2, "CMDREJi"); - return 1; + return -EIO; } if ((sense[2] == 0xaf) && (sense[3] == 0xfe)) { QETH_CARD_TEXT(card, 2, "AFFE"); - return 1; + return -EIO; } if ((!sense[0]) && (!sense[1]) && (!sense[2]) && (!sense[3])) { QETH_CARD_TEXT(card, 2, "ZEROSEN"); return 0; } QETH_CARD_TEXT(card, 2, "DGENCHK"); - return 1; + return -EIO; } return 0; } @@@ -988,21 -986,8 +988,21 @@@ static void qeth_irq(struct ccw_device QETH_CARD_TEXT(card, 5, "data"); }
- if (qeth_intparm_is_iob(intparm)) - iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); + if (intparm == 0) { + QETH_CARD_TEXT(card, 5, "irqunsol"); + } else if ((addr_t)intparm != (addr_t)channel->active_cmd) { + QETH_CARD_TEXT(card, 5, "irqunexp"); + + dev_err(&cdev->dev, + "Received IRQ with intparm %lx, expected %px\n", + intparm, channel->active_cmd); + if (channel->active_cmd) + qeth_cancel_cmd(channel->active_cmd, -EIO); + } else { + iob = (struct qeth_cmd_buffer *) (addr_t)intparm; + } + + channel->active_cmd = NULL;
rc = qeth_check_irb_error(card, cdev, irb); if (rc) { @@@ -1022,10 -1007,15 +1022,10 @@@ if (irb->scsw.cmd.fctl & (SCSW_FCTL_HALT_FUNC)) channel->state = CH_STATE_HALTED;
- if (intparm == QETH_CLEAR_CHANNEL_PARM) { - QETH_CARD_TEXT(card, 6, "clrchpar"); - /* we don't have to handle this further */ - intparm = 0; - } - if (intparm == QETH_HALT_CHANNEL_PARM) { - QETH_CARD_TEXT(card, 6, "hltchpar"); - /* we don't have to handle this further */ - intparm = 0; + if (iob && (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC | + SCSW_FCTL_HALT_FUNC))) { + qeth_cancel_cmd(iob, -ECANCELED); + iob = NULL; }
cstat = irb->scsw.cmd.cstat; @@@ -1418,7 -1408,7 +1418,7 @@@ static int qeth_clear_channel(struct qe
QETH_CARD_TEXT(card, 3, "clearch"); spin_lock_irq(get_ccwdev_lock(channel->ccwdev)); - rc = ccw_device_clear(channel->ccwdev, QETH_CLEAR_CHANNEL_PARM); + rc = ccw_device_clear(channel->ccwdev, (addr_t)channel->active_cmd); spin_unlock_irq(get_ccwdev_lock(channel->ccwdev));
if (rc) @@@ -1440,7 -1430,7 +1440,7 @@@ static int qeth_halt_channel(struct qet
QETH_CARD_TEXT(card, 3, "haltch"); spin_lock_irq(get_ccwdev_lock(channel->ccwdev)); - rc = ccw_device_halt(channel->ccwdev, QETH_HALT_CHANNEL_PARM); + rc = ccw_device_halt(channel->ccwdev, (addr_t)channel->active_cmd); spin_unlock_irq(get_ccwdev_lock(channel->ccwdev));
if (rc) @@@ -1454,25 -1444,6 +1454,25 @@@ return 0; }
+int qeth_stop_channel(struct qeth_channel *channel) +{ + struct ccw_device *cdev = channel->ccwdev; + int rc; + + rc = ccw_device_set_offline(cdev); + + spin_lock_irq(get_ccwdev_lock(cdev)); + if (channel->active_cmd) { + dev_err(&cdev->dev, "Stopped channel while cmd %px was still active\n", + channel->active_cmd); + channel->active_cmd = NULL; + } + spin_unlock_irq(get_ccwdev_lock(cdev)); + + return rc; +} +EXPORT_SYMBOL_GPL(qeth_stop_channel); + static int qeth_halt_channels(struct qeth_card *card) { int rc1 = 0, rc2 = 0, rc3 = 0; @@@ -1542,6 -1513,7 +1542,6 @@@ int qeth_qdio_clear_card(struct qeth_ca rc = qeth_clear_halt_card(card, use_halt); if (rc) QETH_CARD_TEXT_(card, 3, "2err%d", rc); - card->state = CARD_STATE_DOWN; return rc; } EXPORT_SYMBOL_GPL(qeth_qdio_clear_card); @@@ -1775,8 -1747,6 +1775,8 @@@ static int qeth_send_control_data(struc spin_lock_irq(get_ccwdev_lock(channel->ccwdev)); rc = ccw_device_start_timeout(channel->ccwdev, __ccw_from_cmd(iob), (addr_t) iob, 0, 0, timeout); + if (!rc) + channel->active_cmd = iob; spin_unlock_irq(get_ccwdev_lock(channel->ccwdev)); if (rc) { QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n", @@@ -1987,7 -1957,6 +1987,7 @@@ static void qeth_idx_setup_activate_cmd ccw_device_get_id(CARD_DDEV(card), &dev_id); iob->finalize = qeth_idx_finalize_cmd;
+ port |= QETH_IDX_ACT_INVAL_FRAME; memcpy(QETH_IDX_ACT_PNO(iob->data), &port, 1); memcpy(QETH_IDX_ACT_ISSUER_RM_TOKEN(iob->data), &card->token.issuer_rm_w, QETH_MPC_TOKEN_LENGTH); @@@ -2665,18 -2634,6 +2665,18 @@@ static int qeth_init_input_buffer(struc return 0; }
+static unsigned int qeth_tx_select_bulk_max(struct qeth_card *card, + struct qeth_qdio_out_q *queue) +{ + if (!IS_IQD(card) || + qeth_iqd_is_mcast_queue(card, queue) || + card->options.cq == QETH_CQ_ENABLED || + qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd)) + return 1; + + return card->ssqd.mmwc ? card->ssqd.mmwc : 1; +} + int qeth_init_qdio_queues(struct qeth_card *card) { unsigned int i; @@@ -2716,8 -2673,6 +2716,8 @@@ queue->do_pack = 0; queue->prev_hdr = NULL; queue->bulk_start = 0; + queue->bulk_count = 0; + queue->bulk_max = qeth_tx_select_bulk_max(card, queue); atomic_set(&queue->used_buffers, 0); atomic_set(&queue->set_pci_flags_count, 0); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); @@@ -3125,7 -3080,7 +3125,7 @@@ static int qeth_check_qdio_errors(struc buf->element[14].sflags); QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error); if ((buf->element[15].sflags) == 0x12) { - QETH_CARD_STAT_INC(card, rx_dropped); + QETH_CARD_STAT_INC(card, rx_fifo_errors); return 0; } else return 1; @@@ -3152,7 -3107,7 +3152,7 @@@ static void qeth_queue_input_buffer(str for (i = queue->next_buf_to_init; i < queue->next_buf_to_init + count; ++i) { if (qeth_init_input_buffer(card, - &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q])) { + &queue->bufs[QDIO_BUFNR(i)])) { break; } else { newcount++; @@@ -3194,8 -3149,8 +3194,8 @@@ if (rc) { QETH_CARD_TEXT(card, 2, "qinberr"); } - queue->next_buf_to_init = (queue->next_buf_to_init + count) % - QDIO_MAX_BUFFERS_PER_Q; + queue->next_buf_to_init = QDIO_BUFNR(queue->next_buf_to_init + + count); } }
@@@ -3243,7 -3198,7 +3243,7 @@@ static int qeth_prep_flush_pack_buffer( /* it's a packing buffer */ atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); queue->next_buf_to_fill = - (queue->next_buf_to_fill + 1) % QDIO_MAX_BUFFERS_PER_Q; + QDIO_BUFNR(queue->next_buf_to_fill + 1); return 1; } return 0; @@@ -3297,8 -3252,7 +3297,8 @@@ static void qeth_flush_buffers(struct q unsigned int qdio_flags;
for (i = index; i < index + count; ++i) { - int bidx = i % QDIO_MAX_BUFFERS_PER_Q; + unsigned int bidx = QDIO_BUFNR(i); + buf = queue->bufs[bidx]; buf->buffer->element[buf->next_element_to_fill - 1].eflags |= SBAL_EFLAGS_LAST_ENTRY; @@@ -3364,11 -3318,10 +3364,11 @@@
static void qeth_flush_queue(struct qeth_qdio_out_q *queue) { - qeth_flush_buffers(queue, queue->bulk_start, 1); + qeth_flush_buffers(queue, queue->bulk_start, queue->bulk_count);
- queue->bulk_start = QDIO_BUFNR(queue->bulk_start + 1); + queue->bulk_start = QDIO_BUFNR(queue->bulk_start + queue->bulk_count); queue->prev_hdr = NULL; + queue->bulk_count = 0; }
static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) @@@ -3466,7 -3419,8 +3466,7 @@@ static void qeth_qdio_cq_handler(struc }
for (i = first_element; i < first_element + count; ++i) { - int bidx = i % QDIO_MAX_BUFFERS_PER_Q; - struct qdio_buffer *buffer = cq->qdio_bufs[bidx]; + struct qdio_buffer *buffer = cq->qdio_bufs[QDIO_BUFNR(i)]; int e = 0;
while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) && @@@ -3487,8 -3441,8 +3487,8 @@@ "QDIO reported an error, rc=%i\n", rc); QETH_CARD_TEXT(card, 2, "qcqherr"); } - card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init - + count) % QDIO_MAX_BUFFERS_PER_Q; + + cq->next_buf_to_init = QDIO_BUFNR(cq->next_buf_to_init + count); }
static void qeth_qdio_input_handler(struct ccw_device *ccwdev, @@@ -3514,6 -3468,7 +3514,6 @@@ static void qeth_qdio_output_handler(st { struct qeth_card *card = (struct qeth_card *) card_ptr; struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue]; - struct qeth_qdio_out_buffer *buffer; struct net_device *dev = card->dev; struct netdev_queue *txq; int i; @@@ -3527,10 -3482,10 +3527,10 @@@ }
for (i = first_element; i < (first_element + count); ++i) { - int bidx = i % QDIO_MAX_BUFFERS_PER_Q; - buffer = queue->bufs[bidx]; - qeth_handle_send_error(card, buffer, qdio_error); - qeth_clear_output_buffer(queue, buffer, qdio_error, 0); + struct qeth_qdio_out_buffer *buf = queue->bufs[QDIO_BUFNR(i)]; + + qeth_handle_send_error(card, buf, qdio_error); + qeth_clear_output_buffer(queue, buf, qdio_error, 0); }
atomic_sub(count, &queue->used_buffers); @@@ -3725,10 -3680,10 +3725,10 @@@ check_layout }
static bool qeth_iqd_may_bulk(struct qeth_qdio_out_q *queue, - struct qeth_qdio_out_buffer *buffer, struct sk_buff *curr_skb, struct qeth_hdr *curr_hdr) { + struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start]; struct qeth_hdr *prev_hdr = queue->prev_hdr;
if (!prev_hdr) @@@ -3848,14 -3803,13 +3848,14 @@@ static int __qeth_xmit(struct qeth_car struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len) { - struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start]; unsigned int bytes = qdisc_pkt_len(skb); + struct qeth_qdio_out_buffer *buffer; unsigned int next_element; struct netdev_queue *txq; bool stopped = false; bool flush;
+ buffer = queue->bufs[QDIO_BUFNR(queue->bulk_start + queue->bulk_count)]; txq = netdev_get_tx_queue(card->dev, skb_get_queue_mapping(skb));
/* Just a sanity check, the wake/stop logic should ensure that we always @@@ -3864,23 -3818,11 +3864,23 @@@ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) return -EBUSY;
- if ((buffer->next_element_to_fill + elements > queue->max_elements) || - !qeth_iqd_may_bulk(queue, buffer, skb, hdr)) { - atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); - qeth_flush_queue(queue); - buffer = queue->bufs[queue->bulk_start]; + flush = !qeth_iqd_may_bulk(queue, skb, hdr); + + if (flush || + (buffer->next_element_to_fill + elements > queue->max_elements)) { + if (buffer->next_element_to_fill > 0) { + atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); + queue->bulk_count++; + } + + if (queue->bulk_count >= queue->bulk_max) + flush = true; + + if (flush) + qeth_flush_queue(queue); + + buffer = queue->bufs[QDIO_BUFNR(queue->bulk_start + + queue->bulk_count)];
/* Sanity-check again: */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) @@@ -3906,13 -3848,7 +3906,13 @@@
if (flush || next_element >= queue->max_elements) { atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); - qeth_flush_queue(queue); + queue->bulk_count++; + + if (queue->bulk_count >= queue->bulk_max) + flush = true; + + if (flush) + qeth_flush_queue(queue); }
if (stopped && !qeth_out_queue_is_full(queue)) @@@ -3962,7 -3898,8 +3962,7 @@@ int qeth_do_send_packet(struct qeth_car atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); flush_count++; queue->next_buf_to_fill = - (queue->next_buf_to_fill + 1) % - QDIO_MAX_BUFFERS_PER_Q; + QDIO_BUFNR(queue->next_buf_to_fill + 1); buffer = queue->bufs[queue->next_buf_to_fill];
/* We stepped forward, so sanity-check again: */ @@@ -3995,8 -3932,8 +3995,8 @@@ if (!queue->do_pack || stopped || next_element >= queue->max_elements) { flush_count++; atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); - queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % - QDIO_MAX_BUFFERS_PER_Q; + queue->next_buf_to_fill = + QDIO_BUFNR(queue->next_buf_to_fill + 1); }
if (flush_count) @@@ -4324,8 -4261,9 +4324,8 @@@ int qeth_set_access_ctrl_online(struct } return rc; } -EXPORT_SYMBOL_GPL(qeth_set_access_ctrl_online);
-void qeth_tx_timeout(struct net_device *dev) +void qeth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qeth_card *card;
@@@ -4378,9 -4316,7 +4378,9 @@@ static int qeth_mdio_read(struct net_de case MII_NWAYTEST: /* N-way auto-neg test register */ break; case MII_RERRCOUNTER: /* rx error counter */ - rc = card->stats.rx_errors; + rc = card->stats.rx_length_errors + + card->stats.rx_frame_errors + + card->stats.rx_fifo_errors; break; case MII_SREVISION: /* silicon revision */ break; @@@ -4698,12 -4634,12 +4698,12 @@@ EXPORT_SYMBOL_GPL(qeth_vm_request_mac)
static void qeth_determine_capabilities(struct qeth_card *card) { + struct qeth_channel *channel = &card->data; + struct ccw_device *ddev = channel->ccwdev; int rc; - struct ccw_device *ddev; int ddev_offline = 0;
QETH_CARD_TEXT(card, 2, "detcapab"); - ddev = CARD_DDEV(card); if (!ddev->online) { ddev_offline = 1; rc = ccw_device_set_online(ddev); @@@ -4742,7 -4678,7 +4742,7 @@@
out_offline: if (ddev_offline == 1) - ccw_device_set_offline(ddev); + qeth_stop_channel(channel); out: return; } @@@ -4779,7 -4715,7 +4779,7 @@@ static int qeth_qdio_establish(struct q
QETH_CARD_TEXT(card, 2, "qdioest");
- qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL); + qib_param_field = kzalloc(sizeof_member(struct qib, parm), GFP_KERNEL); if (!qib_param_field) { rc = -ENOMEM; goto out_free_nothing; @@@ -4886,6 -4822,7 +4886,6 @@@ static void qeth_core_free_card(struct qeth_clean_channel(&card->data); qeth_put_cmd(card->read_cmd); destroy_workqueue(card->event_wq); - qeth_free_qdio_queues(card); unregister_service_level(&card->qeth_service_level); dev_set_drvdata(&card->gdev->dev, NULL); kfree(card); @@@ -4942,9 -4879,9 +4942,9 @@@ retry QETH_DBF_MESSAGE(2, "Retrying to do IDX activates on device %x.\n", CARD_DEVID(card)); rc = qeth_qdio_clear_card(card, !IS_IQD(card)); - ccw_device_set_offline(CARD_DDEV(card)); - ccw_device_set_offline(CARD_WDEV(card)); - ccw_device_set_offline(CARD_RDEV(card)); + qeth_stop_channel(&card->data); + qeth_stop_channel(&card->write); + qeth_stop_channel(&card->read); qdio_free(CARD_DDEV(card)); rc = ccw_device_set_online(CARD_RDEV(card)); if (rc) @@@ -5040,15 -4977,6 +5040,15 @@@ retriable goto out; } } + + if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) || + (card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM))) + card->info.hwtrap = 0; + + rc = qeth_set_access_ctrl_online(card, 0); + if (rc) + goto out; + return 0; out: dev_warn(&card->gdev->dev, "The qeth device driver failed to recover " @@@ -5059,15 -4987,27 +5059,15 @@@ } EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
-static void qeth_create_skb_frag(struct qdio_buffer_element *element, - struct sk_buff *skb, int offset, int data_len) +static void qeth_create_skb_frag(struct sk_buff *skb, char *data, int data_len) { - struct page *page = virt_to_page(element->addr); + struct page *page = virt_to_page(data); unsigned int next_frag;
- /* first fill the linear space */ - if (!skb->len) { - unsigned int linear = min(data_len, skb_tailroom(skb)); - - skb_put_data(skb, element->addr + offset, linear); - data_len -= linear; - if (!data_len) - return; - offset += linear; - /* fall through to add page frag for remaining data */ - } - next_frag = skb_shinfo(skb)->nr_frags; get_page(page); - skb_add_rx_frag(skb, next_frag, page, offset, data_len, data_len); + skb_add_rx_frag(skb, next_frag, page, offset_in_page(data), data_len, + data_len); }
static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale) @@@ -5082,14 -5022,14 +5082,14 @@@ struct sk_buff *qeth_core_get_next_skb( { struct qdio_buffer_element *element = *__element; struct qdio_buffer *buffer = qethbuffer->buffer; + unsigned int linear_len = 0; int offset = *__offset; + bool use_rx_sg = false; + unsigned int headroom; struct sk_buff *skb; int skb_len = 0; - void *data_ptr; - int data_len; - int headroom = 0; - int use_rx_sg = 0;
+next_packet: /* qeth_hdr must not cross element boundaries */ while (element->length < offset + sizeof(struct qeth_hdr)) { if (qeth_is_last_sbale(element)) @@@ -5100,125 -5040,71 +5100,125 @@@ *hdr = element->addr + offset;
offset += sizeof(struct qeth_hdr); + skb = NULL; + switch ((*hdr)->hdr.l2.id) { case QETH_HEADER_TYPE_LAYER2: skb_len = (*hdr)->hdr.l2.pkt_length; + linear_len = ETH_HLEN; + headroom = 0; break; case QETH_HEADER_TYPE_LAYER3: skb_len = (*hdr)->hdr.l3.length; + if (!IS_LAYER3(card)) { + QETH_CARD_STAT_INC(card, rx_dropped_notsupp); + goto walk_packet; + } + + if ((*hdr)->hdr.l3.flags & QETH_HDR_PASSTHRU) { + linear_len = ETH_HLEN; + headroom = 0; + break; + } + + if ((*hdr)->hdr.l3.flags & QETH_HDR_IPV6) + linear_len = sizeof(struct ipv6hdr); + else + linear_len = sizeof(struct iphdr); headroom = ETH_HLEN; break; case QETH_HEADER_TYPE_OSN: skb_len = (*hdr)->hdr.osn.pdu_length; + if (!IS_OSN(card)) { + QETH_CARD_STAT_INC(card, rx_dropped_notsupp); + goto walk_packet; + } + + linear_len = skb_len; headroom = sizeof(struct qeth_hdr); break; default: - break; - } + if ((*hdr)->hdr.l2.id & QETH_HEADER_MASK_INVAL) + QETH_CARD_STAT_INC(card, rx_frame_errors); + else + QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
- if (!skb_len) + /* Can't determine packet length, drop the whole buffer. */ return NULL; + }
- if (((skb_len >= card->options.rx_sg_cb) && - !IS_OSN(card) && - (!atomic_read(&card->force_alloc_skb))) || - (card->options.cq == QETH_CQ_ENABLED)) - use_rx_sg = 1; + if (skb_len < linear_len) { + QETH_CARD_STAT_INC(card, rx_dropped_runt); + goto walk_packet; + } + + use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) || + ((skb_len >= card->options.rx_sg_cb) && + !atomic_read(&card->force_alloc_skb) && + !IS_OSN(card));
if (use_rx_sg && qethbuffer->rx_skb) { /* QETH_CQ_ENABLED only: */ skb = qethbuffer->rx_skb; qethbuffer->rx_skb = NULL; } else { - unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len; - - skb = napi_alloc_skb(&card->napi, linear + headroom); + if (!use_rx_sg) + linear_len = skb_len; + skb = napi_alloc_skb(&card->napi, linear_len + headroom); } + if (!skb) - goto no_mem; - if (headroom) + QETH_CARD_STAT_INC(card, rx_dropped_nomem); + else if (headroom) skb_reserve(skb, headroom);
- data_ptr = element->addr + offset; +walk_packet: while (skb_len) { - data_len = min(skb_len, (int)(element->length - offset)); - if (data_len) { - if (use_rx_sg) - qeth_create_skb_frag(element, skb, offset, - data_len); - else - skb_put_data(skb, data_ptr, data_len); - } + int data_len = min(skb_len, (int)(element->length - offset)); + char *data = element->addr + offset; + skb_len -= data_len; + offset += data_len; + + /* Extract data from current element: */ + if (skb && data_len) { + if (linear_len) { + unsigned int copy_len; + + copy_len = min_t(unsigned int, linear_len, + data_len); + + skb_put_data(skb, data, copy_len); + linear_len -= copy_len; + data_len -= copy_len; + data += copy_len; + } + + if (data_len) + qeth_create_skb_frag(skb, data, data_len); + } + + /* Step forward to next element: */ if (skb_len) { if (qeth_is_last_sbale(element)) { QETH_CARD_TEXT(card, 4, "unexeob"); QETH_CARD_HEX(card, 2, buffer, sizeof(void *)); - dev_kfree_skb_any(skb); - QETH_CARD_STAT_INC(card, rx_errors); + if (skb) { + dev_kfree_skb_any(skb); + QETH_CARD_STAT_INC(card, + rx_length_errors); + } return NULL; } element++; offset = 0; - data_ptr = element->addr; - } else { - offset += data_len; } } + + /* This packet was skipped, go get another one: */ + if (!skb) + goto next_packet; + *__element = element; *__offset = offset; if (use_rx_sg) { @@@ -5227,6 -5113,12 +5227,6 @@@ skb_shinfo(skb)->nr_frags); } return skb; -no_mem: - if (net_ratelimit()) { - QETH_CARD_TEXT(card, 2, "noskbmem"); - } - QETH_CARD_STAT_INC(card, rx_dropped); - return NULL; } EXPORT_SYMBOL_GPL(qeth_core_get_next_skb);
@@@ -5273,7 -5165,8 +5273,7 @@@ int qeth_poll(struct napi_struct *napi card->rx.b_count--; if (card->rx.b_count) { card->rx.b_index = - (card->rx.b_index + 1) % - QDIO_MAX_BUFFERS_PER_Q; + QDIO_BUFNR(card->rx.b_index + 1); card->rx.b_element = &card->qdio.in_q ->bufs[card->rx.b_index] @@@ -5289,9 -5182,9 +5289,9 @@@ } }
- napi_complete_done(napi, work_done); - if (qdio_start_irq(card->data.ccwdev, 0)) - napi_schedule(&card->napi); + if (napi_complete_done(napi, work_done) && + qdio_start_irq(CARD_DDEV(card), 0)) + napi_schedule(napi); out: return work_done; } @@@ -5810,8 -5703,6 +5810,8 @@@ static void qeth_core_remove_device(str qeth_core_free_discipline(card); }
+ qeth_free_qdio_queues(card); + free_netdev(card->dev); qeth_core_free_card(card); put_device(&gdev->dev); @@@ -6307,16 -6198,9 +6307,16 @@@ void qeth_get_stats64(struct net_devic
stats->rx_packets = card->stats.rx_packets; stats->rx_bytes = card->stats.rx_bytes; - stats->rx_errors = card->stats.rx_errors; - stats->rx_dropped = card->stats.rx_dropped; + stats->rx_errors = card->stats.rx_length_errors + + card->stats.rx_frame_errors + + card->stats.rx_fifo_errors; + stats->rx_dropped = card->stats.rx_dropped_nomem + + card->stats.rx_dropped_notsupp + + card->stats.rx_dropped_runt; stats->multicast = card->stats.rx_multicast; + stats->rx_length_errors = card->stats.rx_length_errors; + stats->rx_frame_errors = card->stats.rx_frame_errors; + stats->rx_fifo_errors = card->stats.rx_fifo_errors;
for (i = 0; i < card->qdio.no_out_queues; i++) { queue = card->qdio.out_qs[i]; diff --combined drivers/s390/net/qeth_core_mpc.h index 88f4dc140751,62aebf5cf4ef..4aa564ea7982 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@@ -11,7 -11,6 +11,7 @@@
#include <asm/qeth.h> #include <uapi/linux/if_ether.h> +#include <uapi/linux/in6.h>
#define IPA_PDU_HEADER_SIZE 0x40 #define QETH_IPA_PDU_LEN_TOTAL(buffer) (buffer + 0x0e) @@@ -29,6 -28,20 +29,6 @@@ extern unsigned char IPA_PDU_HEADER[] #define QETH_TIMEOUT (10 * HZ) #define QETH_IPA_TIMEOUT (45 * HZ)
-#define QETH_CLEAR_CHANNEL_PARM -10 -#define QETH_HALT_CHANNEL_PARM -11 - -static inline bool qeth_intparm_is_iob(unsigned long intparm) -{ - switch (intparm) { - case QETH_CLEAR_CHANNEL_PARM: - case QETH_HALT_CHANNEL_PARM: - case 0: - return false; - } - return true; -} - /*****************************************************************************/ /* IP Assist related definitions */ /*****************************************************************************/ @@@ -352,7 -365,8 +352,7 @@@ struct qeth_ipacmd_setdelip6 struct qeth_ipacmd_setdelipm { __u8 mac[6]; __u8 padding[2]; - __u8 ip6[12]; - __u8 ip4[4]; + struct in6_addr ip; } __attribute__ ((packed));
struct qeth_ipacmd_layer2setdelmac { @@@ -421,7 -435,7 +421,7 @@@ struct qeth_ipacmd_setassparms } data; } __attribute__ ((packed));
- #define SETASS_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setassparms,\ + #define SETASS_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_setassparms,\ data.field)
/* SETRTG IPA Command: ****************************************************/ @@@ -535,7 -549,7 +535,7 @@@ struct qeth_ipacmd_setadpparms } data; } __attribute__ ((packed));
- #define SETADP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setadpparms,\ + #define SETADP_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_setadpparms,\ data.field)
/* CREATE_ADDR IPA Command: ***********************************************/ @@@ -648,7 -662,7 +648,7 @@@ struct qeth_ipacmd_vnicc } data; };
- #define VNICC_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_vnicc,\ + #define VNICC_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_vnicc,\ data.field)
/* SETBRIDGEPORT IPA Command: *********************************************/ @@@ -729,7 -743,7 +729,7 @@@ struct qeth_ipacmd_setbridgeport } data; } __packed;
- #define SBP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setbridgeport,\ + #define SBP_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_setbridgeport,\ data.field)
/* ADDRESS_CHANGE_NOTIFICATION adapter-initiated "command" *******************/ @@@ -790,7 -804,7 +790,7 @@@ struct qeth_ipa_cmd } data; } __attribute__ ((packed));
- #define IPA_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipa_cmd, data.field) + #define IPA_DATA_SIZEOF(field) sizeof_member(struct qeth_ipa_cmd, data.field)
/* * special command for ARP processing. @@@ -886,7 -900,6 +886,7 @@@ extern unsigned char IDX_ACTIVATE_WRITE #define IDX_ACTIVATE_SIZE 0x22 #define QETH_IDX_ACT_PNO(buffer) (buffer+0x0b) #define QETH_IDX_ACT_ISSUER_RM_TOKEN(buffer) (buffer + 0x0c) +#define QETH_IDX_ACT_INVAL_FRAME 0x40 #define QETH_IDX_NO_PORTNAME_REQUIRED(buffer) ((buffer)[0x0b] & 0x80) #define QETH_IDX_ACT_FUNC_LEVEL(buffer) (buffer + 0x10) #define QETH_IDX_ACT_DATASET_NAME(buffer) (buffer + 0x16) diff --combined drivers/scsi/aacraid/aachba.c index e36608ce937a,f30fe29b5701..3b232b6a3eff --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@@ -535,7 -535,7 +535,7 @@@ static void get_container_name_callback if ((le32_to_cpu(get_name_reply->status) == CT_OK) && (get_name_reply->data[0] != '\0')) { char *sp = get_name_reply->data; - int data_size = FIELD_SIZEOF(struct aac_get_name_resp, data); + int data_size = sizeof_member(struct aac_get_name_resp, data);
sp[data_size - 1] = '\0'; while (*sp == ' ') @@@ -574,7 -574,7 +574,7 @@@ static int aac_get_container_name(struc
dev = (struct aac_dev *)scsicmd->device->host->hostdata;
- data_size = FIELD_SIZEOF(struct aac_get_name_resp, data); + data_size = sizeof_member(struct aac_get_name_resp, data);
cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd);
@@@ -1477,7 -1477,6 +1477,7 @@@ static struct aac_srb * aac_scsi_common struct aac_srb * srbcmd; u32 flag; u32 timeout; + struct aac_dev *dev = fib->dev;
aac_fib_init(fib); switch(cmd->sc_data_direction){ @@@ -1504,7 -1503,7 +1504,7 @@@ srbcmd->flags = cpu_to_le32(flag); timeout = cmd->request->timeout/HZ; if (timeout == 0) - timeout = 1; + timeout = (dev->sa_firmware ? AAC_SA_TIMEOUT : AAC_ARC_TIMEOUT); srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds srbcmd->retry_limit = 0; /* Obsolete parameter */ srbcmd->cdb_size = cpu_to_le32(cmd->cmd_len); @@@ -2468,13 -2467,13 +2468,13 @@@ static int aac_read(struct scsi_cmnd * scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION; set_sense(&dev->fsa_dev[cid].sense_data, - HARDWARE_ERROR, SENCODE_INTERNAL_TARGET_FAILURE, + ILLEGAL_REQUEST, SENCODE_LBA_OUT_OF_RANGE, ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0); memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data, min_t(size_t, sizeof(dev->fsa_dev[cid].sense_data), SCSI_SENSE_BUFFERSIZE)); scsicmd->scsi_done(scsicmd); - return 1; + return 0; }
dprintk((KERN_DEBUG "aac_read[cpu %d]: lba = %llu, t = %ld.\n", @@@ -2560,13 -2559,13 +2560,13 @@@ static int aac_write(struct scsi_cmnd scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION; set_sense(&dev->fsa_dev[cid].sense_data, - HARDWARE_ERROR, SENCODE_INTERNAL_TARGET_FAILURE, + ILLEGAL_REQUEST, SENCODE_LBA_OUT_OF_RANGE, ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0); memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data, min_t(size_t, sizeof(dev->fsa_dev[cid].sense_data), SCSI_SENSE_BUFFERSIZE)); scsicmd->scsi_done(scsicmd); - return 1; + return 0; }
dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n", diff --combined drivers/scsi/cxgbi/libcxgbi.c index 0d044c165960,cf261e1ae9d4..e7143b257117 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@@ -2284,6 -2284,34 +2284,6 @@@ int cxgbi_set_conn_param(struct iscsi_c } EXPORT_SYMBOL_GPL(cxgbi_set_conn_param);
-static inline int csk_print_port(struct cxgbi_sock *csk, char *buf) -{ - int len; - - cxgbi_sock_get(csk); - len = sprintf(buf, "%hu\n", ntohs(csk->daddr.sin_port)); - cxgbi_sock_put(csk); - - return len; -} - -static inline int csk_print_ip(struct cxgbi_sock *csk, char *buf) -{ - int len; - - cxgbi_sock_get(csk); - if (csk->csk_family == AF_INET) - len = sprintf(buf, "%pI4", - &csk->daddr.sin_addr.s_addr); - else - len = sprintf(buf, "%pI6", - &csk->daddr6.sin6_addr); - - cxgbi_sock_put(csk); - - return len; -} - int cxgbi_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param, char *buf) { @@@ -2746,7 -2774,7 +2746,7 @@@ static int __init libcxgbi_init_module( { pr_info("%s", version);
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) < + BUILD_BUG_ON(sizeof_member(struct sk_buff, cb) < sizeof(struct cxgbi_skb_cb)); return 0; } diff --combined drivers/scsi/smartpqi/smartpqi_init.c index 7b7ef3acb504,fafa4c1c2fea..e1499626cbbd --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@@ -33,11 -33,11 +33,11 @@@ #define BUILD_TIMESTAMP #endif
-#define DRIVER_VERSION "1.2.8-026" +#define DRIVER_VERSION "1.2.10-025" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 2 -#define DRIVER_RELEASE 8 -#define DRIVER_REVISION 26 +#define DRIVER_RELEASE 10 +#define DRIVER_REVISION 25
#define DRIVER_NAME "Microsemi PQI Driver (v" \ DRIVER_VERSION BUILD_TIMESTAMP ")" @@@ -211,11 -211,6 +211,11 @@@ static inline bool pqi_is_external_raid return scsi3addr[2] != 0; }
+static inline bool pqi_ctrl_offline(struct pqi_ctrl_info *ctrl_info) +{ + return !ctrl_info->controller_online; +} + static inline void pqi_check_ctrl_health(struct pqi_ctrl_info *ctrl_info) { if (ctrl_info->controller_online) @@@ -240,21 -235,6 +240,21 @@@ static inline void pqi_save_ctrl_mode(s sis_write_driver_scratch(ctrl_info, mode); }
+static inline void pqi_ctrl_block_device_reset(struct pqi_ctrl_info *ctrl_info) +{ + ctrl_info->block_device_reset = true; +} + +static inline bool pqi_device_reset_blocked(struct pqi_ctrl_info *ctrl_info) +{ + return ctrl_info->block_device_reset; +} + +static inline bool pqi_ctrl_blocked(struct pqi_ctrl_info *ctrl_info) +{ + return ctrl_info->block_requests; +} + static inline void pqi_ctrl_block_requests(struct pqi_ctrl_info *ctrl_info) { ctrl_info->block_requests = true; @@@ -351,16 -331,6 +351,16 @@@ static inline bool pqi_device_in_remove return device->in_remove && !ctrl_info->in_shutdown; }
+static inline void pqi_ctrl_shutdown_start(struct pqi_ctrl_info *ctrl_info) +{ + ctrl_info->in_shutdown = true; +} + +static inline bool pqi_ctrl_in_shutdown(struct pqi_ctrl_info *ctrl_info) +{ + return ctrl_info->in_shutdown; +} + static inline void pqi_schedule_rescan_worker_with_delay( struct pqi_ctrl_info *ctrl_info, unsigned long delay) { @@@ -390,11 -360,6 +390,11 @@@ static inline void pqi_cancel_rescan_wo cancel_delayed_work_sync(&ctrl_info->rescan_work); }
+static inline void pqi_cancel_event_worker(struct pqi_ctrl_info *ctrl_info) +{ + cancel_work_sync(&ctrl_info->event_work); +} + static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info) { if (!ctrl_info->heartbeat_counter) @@@ -412,7 -377,7 +412,7 @@@ static inline u8 pqi_read_soft_reset_st }
static inline void pqi_clear_soft_reset_status(struct pqi_ctrl_info *ctrl_info, - u8 clear) + u8 clear) { u8 status;
@@@ -497,9 -462,9 +497,9 @@@ static int pqi_build_raid_path_request( request->data_direction = SOP_READ_FLAG; cdb[0] = cmd; if (cmd == CISS_REPORT_PHYS) - cdb[1] = CISS_REPORT_PHYS_EXTENDED; + cdb[1] = CISS_REPORT_PHYS_FLAG_OTHER; else - cdb[1] = CISS_REPORT_LOG_EXTENDED; + cdb[1] = CISS_REPORT_LOG_FLAG_UNIQUE_LUN_ID; put_unaligned_be32(cdb_length, &cdb[6]); break; case CISS_GET_RAID_MAP: @@@ -602,12 -567,13 +602,12 @@@ static void pqi_free_io_request(struct }
static int pqi_send_scsi_raid_request(struct pqi_ctrl_info *ctrl_info, u8 cmd, - u8 *scsi3addr, void *buffer, size_t buffer_length, u16 vpd_page, - struct pqi_raid_error_info *error_info, - unsigned long timeout_msecs) + u8 *scsi3addr, void *buffer, size_t buffer_length, u16 vpd_page, + struct pqi_raid_error_info *error_info, unsigned long timeout_msecs) { int rc; - enum dma_data_direction dir; struct pqi_raid_path_request request; + enum dma_data_direction dir;
rc = pqi_build_raid_path_request(ctrl_info, &request, cmd, scsi3addr, buffer, @@@ -615,44 -581,44 +615,44 @@@ if (rc) return rc;
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, - 0, error_info, timeout_msecs); + rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0, + error_info, timeout_msecs);
pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); + return rc; }
-/* Helper functions for pqi_send_scsi_raid_request */ +/* helper functions for pqi_send_scsi_raid_request */
static inline int pqi_send_ctrl_raid_request(struct pqi_ctrl_info *ctrl_info, - u8 cmd, void *buffer, size_t buffer_length) + u8 cmd, void *buffer, size_t buffer_length) { return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID, - buffer, buffer_length, 0, NULL, NO_TIMEOUT); + buffer, buffer_length, 0, NULL, NO_TIMEOUT); }
static inline int pqi_send_ctrl_raid_with_error(struct pqi_ctrl_info *ctrl_info, - u8 cmd, void *buffer, size_t buffer_length, - struct pqi_raid_error_info *error_info) + u8 cmd, void *buffer, size_t buffer_length, + struct pqi_raid_error_info *error_info) { return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID, - buffer, buffer_length, 0, error_info, NO_TIMEOUT); + buffer, buffer_length, 0, error_info, NO_TIMEOUT); }
- static inline int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info, - struct bmic_identify_controller *buffer) + struct bmic_identify_controller *buffer) { return pqi_send_ctrl_raid_request(ctrl_info, BMIC_IDENTIFY_CONTROLLER, - buffer, sizeof(*buffer)); + buffer, sizeof(*buffer)); }
static inline int pqi_sense_subsystem_info(struct pqi_ctrl_info *ctrl_info, - struct bmic_sense_subsystem_info *sense_info) + struct bmic_sense_subsystem_info *sense_info) { return pqi_send_ctrl_raid_request(ctrl_info, - BMIC_SENSE_SUBSYSTEM_INFORMATION, - sense_info, sizeof(*sense_info)); + BMIC_SENSE_SUBSYSTEM_INFORMATION, sense_info, + sizeof(*sense_info)); }
static inline int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info, @@@ -662,9 -628,83 +662,9 @@@ buffer, buffer_length, vpd_page, NULL, NO_TIMEOUT); }
-static bool pqi_vpd_page_supported(struct pqi_ctrl_info *ctrl_info, - u8 *scsi3addr, u16 vpd_page) -{ - int rc; - int i; - int pages; - unsigned char *buf, bufsize; - - buf = kzalloc(256, GFP_KERNEL); - if (!buf) - return false; - - /* Get the size of the page list first */ - rc = pqi_scsi_inquiry(ctrl_info, scsi3addr, - VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES, - buf, SCSI_VPD_HEADER_SZ); - if (rc != 0) - goto exit_unsupported; - - pages = buf[3]; - if ((pages + SCSI_VPD_HEADER_SZ) <= 255) - bufsize = pages + SCSI_VPD_HEADER_SZ; - else - bufsize = 255; - - /* Get the whole VPD page list */ - rc = pqi_scsi_inquiry(ctrl_info, scsi3addr, - VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES, - buf, bufsize); - if (rc != 0) - goto exit_unsupported; - - pages = buf[3]; - for (i = 1; i <= pages; i++) - if (buf[3 + i] == vpd_page) - goto exit_supported; - -exit_unsupported: - kfree(buf); - return false; - -exit_supported: - kfree(buf); - return true; -} - -static int pqi_get_device_id(struct pqi_ctrl_info *ctrl_info, - u8 *scsi3addr, u8 *device_id, int buflen) -{ - int rc; - unsigned char *buf; - - if (!pqi_vpd_page_supported(ctrl_info, scsi3addr, SCSI_VPD_DEVICE_ID)) - return 1; /* function not supported */ - - buf = kzalloc(64, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - rc = pqi_scsi_inquiry(ctrl_info, scsi3addr, - VPD_PAGE | SCSI_VPD_DEVICE_ID, - buf, 64); - if (rc == 0) { - if (buflen > 16) - buflen = 16; - memcpy(device_id, &buf[SCSI_VPD_DEVICE_ID_IDX], buflen); - } - - kfree(buf); - - return rc; -} - static int pqi_identify_physical_device(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, - struct bmic_identify_physical_device *buffer, - size_t buffer_length) + struct bmic_identify_physical_device *buffer, size_t buffer_length) { int rc; enum dma_data_direction dir; @@@ -685,7 -725,6 +685,7 @@@ 0, NULL, NO_TIMEOUT);
pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); + return rc; }
@@@ -724,7 -763,7 +724,7 @@@ int pqi_csmi_smp_passthru(struct pqi_ct buffer, buffer_length, error_info); }
-#define PQI_FETCH_PTRAID_DATA (1UL<<31) +#define PQI_FETCH_PTRAID_DATA (1 << 31)
static int pqi_set_diag_rescan(struct pqi_ctrl_info *ctrl_info) { @@@ -736,15 -775,14 +736,15 @@@ return -ENOMEM;
rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SENSE_DIAG_OPTIONS, - diag, sizeof(*diag)); + diag, sizeof(*diag)); if (rc) goto out;
diag->options |= cpu_to_le32(PQI_FETCH_PTRAID_DATA);
- rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SET_DIAG_OPTIONS, - diag, sizeof(*diag)); + rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SET_DIAG_OPTIONS, diag, + sizeof(*diag)); + out: kfree(diag);
@@@ -755,7 -793,7 +755,7 @@@ static inline int pqi_write_host_wellne void *buffer, size_t buffer_length) { return pqi_send_ctrl_raid_request(ctrl_info, BMIC_WRITE_HOST_WELLNESS, - buffer, buffer_length); + buffer, buffer_length); }
#pragma pack(1) @@@ -908,7 -946,7 +908,7 @@@ static inline int pqi_report_luns(struc void *buffer, size_t buffer_length) { return pqi_send_ctrl_raid_request(ctrl_info, cmd, buffer, - buffer_length); + buffer_length); }
static int pqi_report_phys_logical_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd, @@@ -1242,9 -1280,9 +1242,9 @@@ static void pqi_get_raid_bypass_status( if (rc) goto out;
-#define RAID_BYPASS_STATUS 4 -#define RAID_BYPASS_CONFIGURED 0x1 -#define RAID_BYPASS_ENABLED 0x2 +#define RAID_BYPASS_STATUS 4 +#define RAID_BYPASS_CONFIGURED 0x1 +#define RAID_BYPASS_ENABLED 0x2
bypass_status = buffer[RAID_BYPASS_STATUS]; device->raid_bypass_configured = @@@ -1347,6 -1385,14 +1347,6 @@@ static int pqi_get_device_info(struct p } }
- if (pqi_get_device_id(ctrl_info, device->scsi3addr, - device->unique_id, sizeof(device->unique_id)) < 0) - dev_warn(&ctrl_info->pci_dev->dev, - "Can't get device id for scsi %d:%d:%d:%d\n", - ctrl_info->scsi_host->host_no, - device->bus, device->target, - device->lun); - out: kfree(buffer);
@@@ -1367,7 -1413,6 +1367,7 @@@ static void pqi_get_physical_disk_info( device->queue_depth = PQI_PHYSICAL_DISK_DEFAULT_MAX_QUEUE_DEPTH; return; } + device->box_index = id_phys->box_index; device->phys_box_on_bus = id_phys->phys_box_on_bus; device->phy_connected_dev_type = id_phys->phy_connected_dev_type[0]; @@@ -1783,7 -1828,7 +1783,7 @@@ static void pqi_update_device_list(stru device = new_device_list[i];
find_result = pqi_scsi_find_entry(ctrl_info, device, - &matching_device); + &matching_device);
switch (find_result) { case DEVICE_SAME: @@@ -2012,8 -2057,9 +2012,8 @@@ static int pqi_update_scsi_devices(stru rc = -ENOMEM; goto out; } - if (pqi_hide_vsep) { - int i;
+ if (pqi_hide_vsep) { for (i = num_physicals - 1; i >= 0; i--) { phys_lun_ext_entry = &physdev_list->lun_entries[i]; @@@ -2086,7 -2132,7 +2086,7 @@@ device->is_physical_device = is_physical_device; if (is_physical_device) { if (phys_lun_ext_entry->device_type == - SA_EXPANDER_SMP_DEVICE) + SA_DEVICE_TYPE_EXPANDER_SMP) device->is_expander_smp_device = true; } else { device->is_external_raid_device = @@@ -2123,13 -2169,16 +2123,13 @@@ if (device->is_physical_device) { device->wwid = phys_lun_ext_entry->wwid; if ((phys_lun_ext_entry->device_flags & - REPORT_PHYS_LUN_DEV_FLAG_AIO_ENABLED) && + CISS_REPORT_PHYS_DEV_FLAG_AIO_ENABLED) && phys_lun_ext_entry->aio_handle) { device->aio_enabled = true; - device->aio_handle = - phys_lun_ext_entry->aio_handle; + device->aio_handle = + phys_lun_ext_entry->aio_handle; } - - pqi_get_physical_disk_info(ctrl_info, - device, id_phys); - + pqi_get_physical_disk_info(ctrl_info, device, id_phys); } else { memcpy(device->volume_id, log_lun_ext_entry->volume_id, sizeof(device->volume_id)); @@@ -3109,7 -3158,7 +3109,7 @@@ static enum pqi_soft_reset_status pqi_p }
static void pqi_process_soft_reset(struct pqi_ctrl_info *ctrl_info, - enum pqi_soft_reset_status reset_status) + enum pqi_soft_reset_status reset_status) { int rc;
@@@ -3153,8 -3202,8 +3153,8 @@@ static void pqi_ofa_process_event(struc
if (event_id == PQI_EVENT_OFA_QUIESCE) { dev_info(&ctrl_info->pci_dev->dev, - "Received Online Firmware Activation quiesce event for controller %u\n", - ctrl_info->ctrl_id); + "Received Online Firmware Activation quiesce event for controller %u\n", + ctrl_info->ctrl_id); pqi_ofa_ctrl_quiesce(ctrl_info); pqi_acknowledge_event(ctrl_info, event); if (ctrl_info->soft_reset_handshake_supported) { @@@ -3174,8 -3223,8 +3174,8 @@@ pqi_ofa_free_host_buffer(ctrl_info); pqi_acknowledge_event(ctrl_info, event); dev_info(&ctrl_info->pci_dev->dev, - "Online Firmware Activation(%u) cancel reason : %u\n", - ctrl_info->ctrl_id, event->ofa_cancel_reason); + "Online Firmware Activation(%u) cancel reason : %u\n", + ctrl_info->ctrl_id, event->ofa_cancel_reason); }
mutex_unlock(&ctrl_info->ofa_mutex); @@@ -3354,7 -3403,7 +3354,7 @@@ static unsigned int pqi_process_event_i #define PQI_LEGACY_INTX_MASK 0x1
static inline void pqi_configure_legacy_intx(struct pqi_ctrl_info *ctrl_info, - bool enable_intx) + bool enable_intx) { u32 intx_mask; struct pqi_device_registers __iomem *pqi_registers; @@@ -3792,7 -3841,7 +3792,7 @@@ static int pqi_create_admin_queues(stru &pqi_registers->admin_oq_pi_addr);
reg = PQI_ADMIN_IQ_NUM_ELEMENTS | - (PQI_ADMIN_OQ_NUM_ELEMENTS) << 8 | + (PQI_ADMIN_OQ_NUM_ELEMENTS << 8) | (admin_queues->int_msg_num << 16); writel(reg, &pqi_registers->admin_iq_num_elements); writel(PQI_CREATE_ADMIN_QUEUE_PAIR, @@@ -3999,8 -4048,8 +3999,8 @@@ static void pqi_raid_synchronous_comple complete(waiting); }
-static int pqi_process_raid_io_error_synchronous(struct pqi_raid_error_info - *error_info) +static int pqi_process_raid_io_error_synchronous( + struct pqi_raid_error_info *error_info) { int rc = -EIO;
@@@ -4073,8 -4122,6 +4073,8 @@@ static int pqi_submit_raid_request_sync goto out; }
+ atomic_inc(&ctrl_info->sync_cmds_outstanding); + io_request = pqi_alloc_io_request(ctrl_info);
put_unaligned_le16(io_request->index, @@@ -4121,7 -4168,6 +4121,7 @@@
pqi_free_io_request(io_request);
+ atomic_dec(&ctrl_info->sync_cmds_outstanding); out: up(&ctrl_info->sync_request_sem);
@@@ -4619,11 -4665,11 +4619,11 @@@ static void pqi_free_all_io_requests(st
static inline int pqi_alloc_error_buffer(struct pqi_ctrl_info *ctrl_info) { - ctrl_info->error_buffer = dma_alloc_coherent(&ctrl_info->pci_dev->dev, - ctrl_info->error_buffer_length, - &ctrl_info->error_buffer_dma_handle, - GFP_KERNEL);
+ ctrl_info->error_buffer = dma_alloc_coherent(&ctrl_info->pci_dev->dev, + ctrl_info->error_buffer_length, + &ctrl_info->error_buffer_dma_handle, + GFP_KERNEL); if (!ctrl_info->error_buffer) return -ENOMEM;
@@@ -5356,7 -5402,7 +5356,7 @@@ static int pqi_scsi_queue_command(struc
pqi_ctrl_busy(ctrl_info); if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device) || - pqi_ctrl_in_ofa(ctrl_info)) { + pqi_ctrl_in_ofa(ctrl_info) || pqi_ctrl_in_shutdown(ctrl_info)) { rc = SCSI_MLQUEUE_HOST_BUSY; goto out; } @@@ -5373,7 -5419,7 +5373,7 @@@ if (pqi_is_logical_device(device)) { raid_bypassed = false; if (device->raid_bypass_enabled && - !blk_rq_is_passthrough(scmd->request)) { + !blk_rq_is_passthrough(scmd->request)) { rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device, scmd, queue_group); if (rc == 0 || rc == SCSI_MLQUEUE_HOST_BUSY) @@@ -5604,18 -5650,6 +5604,18 @@@ static int pqi_ctrl_wait_for_pending_io return 0; }
+static int pqi_ctrl_wait_for_pending_sync_cmds(struct pqi_ctrl_info *ctrl_info) +{ + while (atomic_read(&ctrl_info->sync_cmds_outstanding)) { + pqi_check_ctrl_health(ctrl_info); + if (pqi_ctrl_offline(ctrl_info)) + return -ENXIO; + usleep_range(1000, 2000); + } + + return 0; +} + static void pqi_lun_reset_complete(struct pqi_io_request *io_request, void *context) { @@@ -5624,8 -5658,7 +5624,8 @@@ complete(waiting); }
-#define PQI_LUN_RESET_TIMEOUT_SECS 10 +#define PQI_LUN_RESET_TIMEOUT_SECS 30 +#define PQI_LUN_RESET_POLL_COMPLETION_SECS 10
static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, struct completion *wait) @@@ -5634,7 -5667,7 +5634,7 @@@
while (1) { if (wait_for_completion_io_timeout(wait, - PQI_LUN_RESET_TIMEOUT_SECS * PQI_HZ)) { + PQI_LUN_RESET_POLL_COMPLETION_SECS * PQI_HZ)) { rc = 0; break; } @@@ -5671,9 -5704,6 +5671,9 @@@ static int pqi_lun_reset(struct pqi_ctr memcpy(request->lun_number, device->scsi3addr, sizeof(request->lun_number)); request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET; + if (ctrl_info->tmf_iu_timeout_supported) + put_unaligned_le16(PQI_LUN_RESET_TIMEOUT_SECS, + &request->timeout);
pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, @@@ -5703,7 -5733,7 +5703,7 @@@ static int _pqi_device_reset(struct pqi
for (retries = 0;;) { rc = pqi_lun_reset(ctrl_info, device); - if (rc != -EAGAIN || ++retries > PQI_LUN_RESET_RETRIES) + if (rc == 0 || ++retries > PQI_LUN_RESET_RETRIES) break; msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS); } @@@ -5757,17 -5787,17 +5757,17 @@@ static int pqi_eh_device_reset_handler( shost->host_no, device->bus, device->target, device->lun);
pqi_check_ctrl_health(ctrl_info); - if (pqi_ctrl_offline(ctrl_info)) { - dev_err(&ctrl_info->pci_dev->dev, - "controller %u offlined - cannot send device reset\n", - ctrl_info->ctrl_id); + if (pqi_ctrl_offline(ctrl_info) || + pqi_device_reset_blocked(ctrl_info)) { rc = FAILED; goto out; }
pqi_wait_until_ofa_finished(ctrl_info);
+ atomic_inc(&ctrl_info->sync_cmds_outstanding); rc = pqi_device_reset(ctrl_info, device); + atomic_dec(&ctrl_info->sync_cmds_outstanding);
out: dev_err(&ctrl_info->pci_dev->dev, @@@ -6036,9 -6066,6 +6036,9 @@@ static int pqi_passthru_ioctl(struct pq
put_unaligned_le16(iu_length, &request.header.iu_length);
+ if (ctrl_info->raid_iu_timeout_supported) + put_unaligned_le32(iocommand.Request.Timeout, &request.timeout); + rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, PQI_SYNC_FLAGS_INTERRUPTABLE, &pqi_error_info, NO_TIMEOUT);
@@@ -6092,7 -6119,7 +6092,7 @@@ static int pqi_ioctl(struct scsi_devic
ctrl_info = shost_to_hba(sdev->host);
- if (pqi_ctrl_in_ofa(ctrl_info)) + if (pqi_ctrl_in_ofa(ctrl_info) || pqi_ctrl_in_shutdown(ctrl_info)) return -EBUSY;
switch (cmd) { @@@ -6133,8 -6160,14 +6133,8 @@@ static ssize_t pqi_firmware_version_sho static ssize_t pqi_driver_version_show(struct device *dev, struct device_attribute *attr, char *buffer) { - struct Scsi_Host *shost; - struct pqi_ctrl_info *ctrl_info; - - shost = class_to_shost(dev); - ctrl_info = shost_to_hba(shost); - - return snprintf(buffer, PAGE_SIZE, - "%s\n", DRIVER_VERSION BUILD_TIMESTAMP); + return snprintf(buffer, PAGE_SIZE, "%s\n", + DRIVER_VERSION BUILD_TIMESTAMP); }
static ssize_t pqi_serial_number_show(struct device *dev, @@@ -6250,7 -6283,7 +6250,7 @@@ static ssize_t pqi_unique_id_show(struc struct scsi_device *sdev; struct pqi_scsi_dev *device; unsigned long flags; - unsigned char uid[16]; + u8 unique_id[16];
sdev = to_scsi_device(dev); ctrl_info = shost_to_hba(sdev->host); @@@ -6263,22 -6296,16 +6263,22 @@@ flags); return -ENODEV; } - memcpy(uid, device->unique_id, sizeof(uid)); + + if (device->is_physical_device) { + memset(unique_id, 0, 8); + memcpy(unique_id + 8, &device->wwid, sizeof(device->wwid)); + } else { + memcpy(unique_id, device->volume_id, sizeof(device->volume_id)); + }
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
return snprintf(buffer, PAGE_SIZE, "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\n", - uid[0], uid[1], uid[2], uid[3], - uid[4], uid[5], uid[6], uid[7], - uid[8], uid[9], uid[10], uid[11], - uid[12], uid[13], uid[14], uid[15]); + unique_id[0], unique_id[1], unique_id[2], unique_id[3], + unique_id[4], unique_id[5], unique_id[6], unique_id[7], + unique_id[8], unique_id[9], unique_id[10], unique_id[11], + unique_id[12], unique_id[13], unique_id[14], unique_id[15]); }
static ssize_t pqi_lunid_show(struct device *dev, @@@ -6301,7 -6328,6 +6301,7 @@@ flags); return -ENODEV; } + memcpy(lunid, device->scsi3addr, sizeof(lunid));
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); @@@ -6309,8 -6335,7 +6309,8 @@@ return snprintf(buffer, PAGE_SIZE, "0x%8phN\n", lunid); }
-#define MAX_PATHS 8 +#define MAX_PATHS 8 + static ssize_t pqi_path_info_show(struct device *dev, struct device_attribute *attr, char *buf) { @@@ -6322,9 -6347,9 +6322,9 @@@ int output_len = 0; u8 box; u8 bay; - u8 path_map_index = 0; + u8 path_map_index; char *active; - unsigned char phys_connector[2]; + u8 phys_connector[2];
sdev = to_scsi_device(dev); ctrl_info = shost_to_hba(sdev->host); @@@ -6340,7 -6365,7 +6340,7 @@@
bay = device->bay; for (i = 0; i < MAX_PATHS; i++) { - path_map_index = 1<<i; + path_map_index = 1 << i; if (i == device->active_path_index) active = "Active"; else if (device->path_map & path_map_index) @@@ -6391,10 -6416,10 +6391,10 @@@ end_buffer }
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + return output_len; }
- static ssize_t pqi_sas_address_show(struct device *dev, struct device_attribute *attr, char *buffer) { @@@ -6415,7 -6440,6 +6415,7 @@@ flags); return -ENODEV; } + sas_address = device->sas_address;
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); @@@ -6820,27 -6844,6 +6820,27 @@@ static void pqi_firmware_feature_status firmware_feature->feature_name); }
+static void pqi_ctrl_update_feature_flags(struct pqi_ctrl_info *ctrl_info, + struct pqi_firmware_feature *firmware_feature) +{ + switch (firmware_feature->feature_bit) { + case PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE: + ctrl_info->soft_reset_handshake_supported = + firmware_feature->enabled; + break; + case PQI_FIRMWARE_FEATURE_RAID_IU_TIMEOUT: + ctrl_info->raid_iu_timeout_supported = + firmware_feature->enabled; + break; + case PQI_FIRMWARE_FEATURE_TMF_IU_TIMEOUT: + ctrl_info->tmf_iu_timeout_supported = + firmware_feature->enabled; + break; + } + + pqi_firmware_feature_status(ctrl_info, firmware_feature); +} + static inline void pqi_firmware_feature_update(struct pqi_ctrl_info *ctrl_info, struct pqi_firmware_feature *firmware_feature) { @@@ -6864,17 -6867,7 +6864,17 @@@ static struct pqi_firmware_feature pqi_ { .feature_name = "New Soft Reset Handshake", .feature_bit = PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE, - .feature_status = pqi_firmware_feature_status, + .feature_status = pqi_ctrl_update_feature_flags, + }, + { + .feature_name = "RAID IU Timeout", + .feature_bit = PQI_FIRMWARE_FEATURE_RAID_IU_TIMEOUT, + .feature_status = pqi_ctrl_update_feature_flags, + }, + { + .feature_name = "TMF IU Timeout", + .feature_bit = PQI_FIRMWARE_FEATURE_TMF_IU_TIMEOUT, + .feature_status = pqi_ctrl_update_feature_flags, }, };
@@@ -6928,6 -6921,7 +6928,6 @@@ static void pqi_process_firmware_featur return; }
- ctrl_info->soft_reset_handshake_supported = false; for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) { if (!pqi_firmware_features[i].supported) continue; @@@ -6935,6 -6929,10 +6935,6 @@@ firmware_features_iomem_addr, pqi_firmware_features[i].feature_bit)) { pqi_firmware_features[i].enabled = true; - if (pqi_firmware_features[i].feature_bit == - PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE) - ctrl_info->soft_reset_handshake_supported = - true; } pqi_firmware_feature_update(ctrl_info, &pqi_firmware_features[i]); @@@ -7076,20 -7074,13 +7076,20 @@@ static int pqi_force_sis_mode(struct pq return pqi_revert_to_sis_mode(ctrl_info); }
+#define PQI_POST_RESET_DELAY_B4_MSGU_READY 5000 + static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) { int rc;
- rc = pqi_force_sis_mode(ctrl_info); - if (rc) - return rc; + if (reset_devices) { + sis_soft_reset(ctrl_info); + msleep(PQI_POST_RESET_DELAY_B4_MSGU_READY); + } else { + rc = pqi_force_sis_mode(ctrl_info); + if (rc) + return rc; + }
/* * Wait until the controller is ready to start accepting SIS @@@ -7395,7 -7386,7 +7395,7 @@@ static int pqi_ctrl_init_resume(struct rc = pqi_get_ctrl_product_details(ctrl_info); if (rc) { dev_err(&ctrl_info->pci_dev->dev, - "error obtaining product detail\n"); + "error obtaining product details\n"); return rc; }
@@@ -7523,7 -7514,6 +7523,7 @@@ static struct pqi_ctrl_info *pqi_alloc_
INIT_WORK(&ctrl_info->event_work, pqi_event_worker); atomic_set(&ctrl_info->num_interrupts, 0); + atomic_set(&ctrl_info->sync_cmds_outstanding, 0);
INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker); INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker); @@@ -7731,8 -7721,6 +7731,8 @@@ static void pqi_ofa_setup_host_buffer(s dev_err(dev, "Failed to allocate host buffer of size = %u", bytes_requested); } + + return; }
static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info) @@@ -7799,6 -7787,8 +7799,6 @@@ static int pqi_ofa_host_memory_update(s 0, NULL, NO_TIMEOUT); }
-#define PQI_POST_RESET_DELAY_B4_MSGU_READY 5000 - static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info) { msleep(PQI_POST_RESET_DELAY_B4_MSGU_READY); @@@ -7966,73 -7956,28 +7966,73 @@@ static void pqi_pci_remove(struct pci_d pqi_remove_ctrl(ctrl_info); }
+static void pqi_crash_if_pending_command(struct pqi_ctrl_info *ctrl_info) +{ + unsigned int i; + struct pqi_io_request *io_request; + struct scsi_cmnd *scmd; + + for (i = 0; i < ctrl_info->max_io_slots; i++) { + io_request = &ctrl_info->io_request_pool[i]; + if (atomic_read(&io_request->refcount) == 0) + continue; + scmd = io_request->scmd; + WARN_ON(scmd != NULL); /* IO command from SML */ + WARN_ON(scmd == NULL); /* Non-IO cmd or driver initiated*/ + } +} + static void pqi_shutdown(struct pci_dev *pci_dev) { int rc; struct pqi_ctrl_info *ctrl_info;
ctrl_info = pci_get_drvdata(pci_dev); - if (!ctrl_info) - goto error; + if (!ctrl_info) { + dev_err(&pci_dev->dev, + "cache could not be flushed\n"); + return; + } + + pqi_disable_events(ctrl_info); + pqi_wait_until_ofa_finished(ctrl_info); + pqi_cancel_update_time_worker(ctrl_info); + pqi_cancel_rescan_worker(ctrl_info); + pqi_cancel_event_worker(ctrl_info); + + pqi_ctrl_shutdown_start(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); + + rc = pqi_ctrl_wait_for_pending_io(ctrl_info, NO_TIMEOUT); + if (rc) { + dev_err(&pci_dev->dev, + "wait for pending I/O failed\n"); + return; + } + + pqi_ctrl_block_device_reset(ctrl_info); + pqi_wait_until_lun_reset_finished(ctrl_info);
/* * Write all data in the controller's battery-backed cache to * storage. */ rc = pqi_flush_cache(ctrl_info, SHUTDOWN); - pqi_free_interrupts(ctrl_info); - pqi_reset(ctrl_info); - if (rc == 0) + if (rc) + dev_err(&pci_dev->dev, + "unable to flush controller cache\n"); + + pqi_ctrl_block_requests(ctrl_info); + + rc = pqi_ctrl_wait_for_pending_sync_cmds(ctrl_info); + if (rc) { + dev_err(&pci_dev->dev, + "wait for pending sync cmds failed\n"); return; + }
-error: - dev_warn(&pci_dev->dev, - "unable to flush controller cache\n"); + pqi_crash_if_pending_command(ctrl_info); + pqi_reset(ctrl_info); }
static void pqi_process_lockup_action_param(void) @@@ -8689,11 -8634,11 +8689,11 @@@ static void __attribute__((unused)) ver BUILD_BUG_ON(offsetof(struct pqi_general_admin_request, data.delete_operational_queue.queue_id) != 12); BUILD_BUG_ON(sizeof(struct pqi_general_admin_request) != 64); - BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request, + BUILD_BUG_ON(sizeof_member(struct pqi_general_admin_request, data.create_operational_iq) != 64 - 11); - BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request, + BUILD_BUG_ON(sizeof_member(struct pqi_general_admin_request, data.create_operational_oq) != 64 - 11); - BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request, + BUILD_BUG_ON(sizeof_member(struct pqi_general_admin_request, data.delete_operational_queue) != 64 - 11);
BUILD_BUG_ON(offsetof(struct pqi_general_admin_response, @@@ -8740,8 -8685,6 +8740,8 @@@ error_index) != 27); BUILD_BUG_ON(offsetof(struct pqi_raid_path_request, cdb) != 32); + BUILD_BUG_ON(offsetof(struct pqi_raid_path_request, + timeout) != 60); BUILD_BUG_ON(offsetof(struct pqi_raid_path_request, sg_descriptors) != 64); BUILD_BUG_ON(sizeof(struct pqi_raid_path_request) != @@@ -8897,8 -8840,6 +8897,8 @@@ BUILD_BUG_ON(offsetof(struct pqi_task_management_request, nexus_id) != 10); BUILD_BUG_ON(offsetof(struct pqi_task_management_request, + timeout) != 14); + BUILD_BUG_ON(offsetof(struct pqi_task_management_request, lun_number) != 16); BUILD_BUG_ON(offsetof(struct pqi_task_management_request, protocol_specific) != 24); diff --combined drivers/usb/gadget/function/f_fs.c index ce1d0235969c,2539ea40e94c..b0f3aa67a040 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@@ -1352,6 -1352,14 +1352,6 @@@ static long ffs_epfile_ioctl(struct fil return ret; }
-#ifdef CONFIG_COMPAT -static long ffs_epfile_compat_ioctl(struct file *file, unsigned code, - unsigned long value) -{ - return ffs_epfile_ioctl(file, code, value); -} -#endif - static const struct file_operations ffs_epfile_operations = { .llseek = no_llseek,
@@@ -1360,7 -1368,9 +1360,7 @@@ .read_iter = ffs_epfile_read_iter, .release = ffs_epfile_release, .unlocked_ioctl = ffs_epfile_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ffs_epfile_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, };
@@@ -3509,7 -3519,7 +3509,7 @@@ static void ffs_free_inst(struct usb_fu
static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name) { - if (strlen(name) >= FIELD_SIZEOF(struct ffs_dev, name)) + if (strlen(name) >= sizeof_member(struct ffs_dev, name)) return -ENAMETOOLONG; return ffs_name_dev(to_f_fs_opts(fi)->dev, name); } diff --combined fs/crypto/keyring.c index 040df1f5e1c8,58e8b4b167d6..7853f202a17d --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@@ -43,10 -43,8 +43,10 @@@ static void free_master_key(struct fscr
wipe_master_key_secret(&mk->mk_secret);
- for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++) - crypto_free_skcipher(mk->mk_mode_keys[i]); + for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { + crypto_free_skcipher(mk->mk_direct_tfms[i]); + crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]); + }
key_put(mk->mk_users); kzfree(mk); @@@ -151,7 -149,7 +151,7 @@@ static struct key *search_fscrypt_keyri }
#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \ - (CONST_STRLEN("fscrypt-") + FIELD_SIZEOF(struct super_block, s_id)) + (CONST_STRLEN("fscrypt-") + sizeof_member(struct super_block, s_id))
#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
diff --combined fs/ext2/super.c index 8643f98e9578,2435eda5e2d6..cd53b01d2bfd --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@@ -219,7 -219,7 +219,7 @@@ static int __init init_inodecache(void (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| SLAB_ACCOUNT), offsetof(struct ext2_inode_info, i_data), - sizeof_field(struct ext2_inode_info, i_data), + sizeof_member(struct ext2_inode_info, i_data), init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; @@@ -702,7 -702,13 +702,7 @@@ static int ext2_check_descriptors(struc for (i = 0; i < sbi->s_groups_count; i++) { struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i); - ext2_fsblk_t last_block; - - if (i == sbi->s_groups_count - 1) - last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; - else - last_block = first_block + - (EXT2_BLOCKS_PER_GROUP(sb) - 1); + ext2_fsblk_t last_block = ext2_group_last_block_no(sb, i);
if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) @@@ -800,6 -806,7 +800,6 @@@ static unsigned long descriptor_loc(str { struct ext2_sb_info *sbi = EXT2_SB(sb); unsigned long bg, first_meta_bg; - int has_super = 0; first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
@@@ -807,8 -814,10 +807,8 @@@ nr < first_meta_bg) return (logic_sb_block + nr + 1); bg = sbi->s_desc_per_block * nr; - if (ext2_bg_has_super(sb, bg)) - has_super = 1;
- return ext2_group_first_block_no(sb, bg) + has_super; + return ext2_group_first_block_no(sb, bg) + ext2_bg_has_super(sb, bg); }
static int ext2_fill_super(struct super_block *sb, void *data, int silent) @@@ -1138,7 -1147,6 +1138,7 @@@ ext2_count_dirs(sb), GFP_KERNEL); } if (err) { + ret = err; ext2_msg(sb, KERN_ERR, "error: insufficient memory"); goto failed_mount3; } diff --combined fs/ext4/super.c index 1d82b56d9b11,2318e5fe3fd4..1ef37008ea9c --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@@ -1151,7 -1151,7 +1151,7 @@@ static int __init init_inodecache(void (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| SLAB_ACCOUNT), offsetof(struct ext4_inode_info, i_data), - sizeof_field(struct ext4_inode_info, i_data), + sizeof_member(struct ext4_inode_info, i_data), init_once); if (ext4_inode_cachep == NULL) return -ENOMEM; @@@ -1172,9 -1172,9 +1172,9 @@@ void ext4_clear_inode(struct inode *ino { invalidate_inode_buffers(inode); clear_inode(inode); - dquot_drop(inode); ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); + dquot_drop(inode); if (EXT4_I(inode)->jinode) { jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), EXT4_I(inode)->jinode); @@@ -1345,18 -1345,6 +1345,18 @@@ static bool ext4_dummy_context(struct i return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); }
+static bool ext4_has_stable_inodes(struct super_block *sb) +{ + return ext4_has_feature_stable_inodes(sb); +} + +static void ext4_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); + *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); +} + static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, @@@ -1364,8 -1352,6 +1364,8 @@@ .dummy_context = ext4_dummy_context, .empty_dir = ext4_empty_dir, .max_namelen = EXT4_NAME_LEN, + .has_stable_inodes = ext4_has_stable_inodes, + .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, }; #endif
@@@ -1388,6 -1374,7 +1388,6 @@@ static ssize_t ext4_quota_write(struct static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags); static int ext4_enable_quotas(struct super_block *sb); -static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
static struct dquot **ext4_get_dquots(struct inode *inode) { @@@ -1405,7 -1392,7 +1405,7 @@@ static const struct dquot_operations ex .destroy_dquot = dquot_destroy, .get_projid = ext4_get_projid, .get_inode_usage = ext4_get_inode_usage, - .get_next_id = ext4_get_next_id, + .get_next_id = dquot_get_next_id, };
static const struct quotactl_ops ext4_qctl_operations = { @@@ -2064,7 -2051,7 +2064,7 @@@ static int parse_options(char *options unsigned int *journal_ioprio, int is_remount) { - struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb); char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name; substring_t args[MAX_OPT_ARGS]; int token; @@@ -2118,6 -2105,16 +2118,6 @@@ } } #endif - if (test_opt(sb, DIOREAD_NOLOCK)) { - int blocksize = - BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - - if (blocksize < PAGE_SIZE) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "dioread_nolock if block size != PAGE_SIZE"); - return 0; - } - } return 1; }
@@@ -3558,15 -3555,12 +3558,15 @@@ static void ext4_clamp_want_extra_isize { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + unsigned def_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE;
- /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && - sbi->s_want_extra_isize == 0) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; + if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = 0; + return; + } + if (sbi->s_want_extra_isize < 4) { + sbi->s_want_extra_isize = def_extra_isize; if (ext4_has_feature_extra_isize(sb)) { if (sbi->s_want_extra_isize < le16_to_cpu(es->s_want_extra_isize)) @@@ -3579,10 -3573,10 +3579,10 @@@ } } /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; + if ((sbi->s_want_extra_isize > sbi->s_inode_size) || + (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size)) { + sbi->s_want_extra_isize = def_extra_isize; ext4_msg(sb, KERN_INFO, "required extra inode space not available"); } @@@ -4445,6 -4439,13 +4445,6 @@@ no_journal } }
- if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && - (blocksize != PAGE_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Unsupported blocksize for fs encryption"); - goto failed_mount_wq; - } - if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); goto failed_mount_wq; @@@ -5834,7 -5835,7 +5834,7 @@@ static int ext4_quota_enable(struct sup /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); - err = dquot_enable(qf_inode, type, format_id, flags); + err = dquot_load_quota_inode(qf_inode, type, format_id, flags); if (err) lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); iput(qf_inode); @@@ -6018,6 -6019,18 +6018,6 @@@ out } return len; } - -static int ext4_get_next_id(struct super_block *sb, struct kqid *qid) -{ - const struct quota_format_ops *ops; - - if (!sb_has_quota_loaded(sb, qid->type)) - return -ESRCH; - ops = sb_dqopt(sb)->ops[qid->type]; - if (!ops || !ops->get_next_id) - return -ENOSYS; - return dquot_get_next_id(sb, qid); -} #endif
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, diff --combined fs/fuse/virtio_fs.c index bade74768903,13f67ae37257..be36b69de9ca --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@@ -30,12 -30,10 +30,12 @@@ struct virtio_fs_vq struct virtqueue *vq; /* protected by ->lock */ struct work_struct done_work; struct list_head queued_reqs; + struct list_head end_reqs; /* End these requests */ struct delayed_work dispatch_work; struct fuse_dev *fud; bool connected; long in_flight; + struct completion in_flight_zero; /* No inflight requests */ char name[24]; } ____cacheline_aligned_in_smp;
@@@ -49,20 -47,13 +49,20 @@@ struct virtio_fs unsigned int num_request_queues; /* number of request queues */ };
-struct virtio_fs_forget { +struct virtio_fs_forget_req { struct fuse_in_header ih; struct fuse_forget_in arg; +}; + +struct virtio_fs_forget { /* This request can be temporarily queued on virt queue */ struct list_head list; + struct virtio_fs_forget_req req; };
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, + struct fuse_req *req, bool in_flight); + static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) { struct virtio_fs *fs = vq->vdev->priv; @@@ -75,21 -66,6 +75,21 @@@ static inline struct fuse_pqueue *vq_to return &vq_to_fsvq(vq)->fud->pq; }
+/* Should be called with fsvq->lock held. */ +static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) +{ + fsvq->in_flight++; +} + +/* Should be called with fsvq->lock held. */ +static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight <= 0); + fsvq->in_flight--; + if (!fsvq->in_flight) + complete(&fsvq->in_flight_zero); +} + static void release_virtio_fs_obj(struct kref *ref) { struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); @@@ -118,46 -94,51 +118,46 @@@ static void virtio_fs_drain_queue(struc WARN_ON(fsvq->in_flight < 0);
/* Wait for in flight requests to finish.*/ - while (1) { - spin_lock(&fsvq->lock); - if (!fsvq->in_flight) { - spin_unlock(&fsvq->lock); - break; - } + spin_lock(&fsvq->lock); + if (fsvq->in_flight) { + /* We are holding virtio_fs_mutex. There should not be any + * waiters waiting for completion. + */ + reinit_completion(&fsvq->in_flight_zero); + spin_unlock(&fsvq->lock); + wait_for_completion(&fsvq->in_flight_zero); + } else { spin_unlock(&fsvq->lock); - /* TODO use completion instead of timeout */ - usleep_range(1000, 2000); }
flush_work(&fsvq->done_work); flush_delayed_work(&fsvq->dispatch_work); }
-static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) -{ - struct virtio_fs_forget *forget; - - spin_lock(&fsvq->lock); - while (1) { - forget = list_first_entry_or_null(&fsvq->queued_reqs, - struct virtio_fs_forget, list); - if (!forget) - break; - list_del(&forget->list); - kfree(forget); - } - spin_unlock(&fsvq->lock); -} - -static void virtio_fs_drain_all_queues(struct virtio_fs *fs) +static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; int i;
for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; - if (i == VQ_HIPRIO) - drain_hiprio_queued_reqs(fsvq); - virtio_fs_drain_queue(fsvq); } }
+static void virtio_fs_drain_all_queues(struct virtio_fs *fs) +{ + /* Provides mutual exclusion between ->remove and ->kill_sb + * paths. We don't want both of these draining queue at the + * same time. Current completion logic reinits completion + * and that means there should not be any other thread + * doing reinit or waiting for completion already. + */ + mutex_lock(&virtio_fs_mutex); + virtio_fs_drain_all_queues_locked(fs); + mutex_unlock(&virtio_fs_mutex); +} + static void virtio_fs_start_all_queues(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; @@@ -234,7 -215,7 +234,7 @@@ static void virtio_fs_free_devs(struct /* Read filesystem name from virtio config into fs->tag (must kfree()). */ static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) { - char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; + char tag_buf[sizeof_member(struct virtio_fs_config, tag)]; char *end; size_t len;
@@@ -272,148 -253,74 +272,148 @@@ static void virtio_fs_hiprio_done_work(
while ((req = virtqueue_get_buf(vq, &len)) != NULL) { kfree(req); - fsvq->in_flight--; + dec_in_flight_req(fsvq); } } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); spin_unlock(&fsvq->lock); }
-static void virtio_fs_dummy_dispatch_work(struct work_struct *work) +static void virtio_fs_request_dispatch_work(struct work_struct *work) { -} - -static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) -{ - struct virtio_fs_forget *forget; + struct fuse_req *req; struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, dispatch_work.work); - struct virtqueue *vq = fsvq->vq; - struct scatterlist sg; - struct scatterlist *sgs[] = {&sg}; - bool notify; + struct fuse_conn *fc = fsvq->fud->fc; int ret;
pr_debug("virtio-fs: worker %s called.\n", __func__); while (1) { spin_lock(&fsvq->lock); - forget = list_first_entry_or_null(&fsvq->queued_reqs, - struct virtio_fs_forget, list); - if (!forget) { + req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, + list); + if (!req) { spin_unlock(&fsvq->lock); - return; + break; }
- list_del(&forget->list); - if (!fsvq->connected) { + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + fuse_request_end(fc, req); + } + + /* Dispatch pending requests */ + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->queued_reqs, + struct fuse_req, list); + if (!req) { spin_unlock(&fsvq->lock); - kfree(forget); - continue; + return; } + list_del_init(&req->list); + spin_unlock(&fsvq->lock);
- sg_init_one(&sg, forget, sizeof(*forget)); - - /* Enqueue the request */ - dev_dbg(&vq->vdev->dev, "%s\n", __func__); - ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); + ret = virtio_fs_enqueue_req(fsvq, req, true); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { - pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", - ret); - list_add_tail(&forget->list, - &fsvq->queued_reqs); + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); schedule_delayed_work(&fsvq->dispatch_work, - msecs_to_jiffies(1)); - } else { - pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", - ret); - kfree(forget); + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; } + req->out.h.error = ret; + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); + pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", + ret); + fuse_request_end(fc, req); + } + } +} + +/* + * Returns 1 if queue is full and sender should wait a bit before sending + * next request, 0 otherwise. + */ +static int send_forget_request(struct virtio_fs_vq *fsvq, + struct virtio_fs_forget *forget, + bool in_flight) +{ + struct scatterlist sg; + struct virtqueue *vq; + int ret = 0; + bool notify; + struct virtio_fs_forget_req *req = &forget->req; + + spin_lock(&fsvq->lock); + if (!fsvq->connected) { + if (in_flight) + dec_in_flight_req(fsvq); + kfree(forget); + goto out; + } + + sg_init_one(&sg, req, sizeof(*req)); + vq = fsvq->vq; + dev_dbg(&vq->vdev->dev, "%s\n", __func__); + + ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", + ret); + list_add_tail(&forget->list, &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + if (!in_flight) + inc_in_flight_req(fsvq); + /* Queue is full */ + ret = 1; + } else { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", + ret); + kfree(forget); + if (in_flight) + dec_in_flight_req(fsvq); + } + goto out; + } + + if (!in_flight) + inc_in_flight_req(fsvq); + notify = virtqueue_kick_prepare(vq); + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); + return ret; +out: + spin_unlock(&fsvq->lock); + return ret; +} + +static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) +{ + struct virtio_fs_forget *forget; + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + dispatch_work.work); + pr_debug("virtio-fs: worker %s called.\n", __func__); + while (1) { + spin_lock(&fsvq->lock); + forget = list_first_entry_or_null(&fsvq->queued_reqs, + struct virtio_fs_forget, list); + if (!forget) { spin_unlock(&fsvq->lock); return; }
- fsvq->in_flight++; - notify = virtqueue_kick_prepare(vq); + list_del(&forget->list); spin_unlock(&fsvq->lock); - - if (notify) - virtqueue_notify(vq); - pr_debug("virtio-fs: worker %s dispatched one forget request.\n", - __func__); + if (send_forget_request(fsvq, forget, true)) + return; } }
@@@ -545,7 -452,7 +545,7 @@@ static void virtio_fs_requests_done_wor
fuse_request_end(fc, req); spin_lock(&fsvq->lock); - fsvq->in_flight--; + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); } } @@@ -595,10 -502,8 +595,10 @@@ static int virtio_fs_setup_vqs(struct v names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, virtio_fs_hiprio_dispatch_work); + init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero); spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
/* Initialize the requests virtqueues */ @@@ -606,10 -511,8 +606,10 @@@ spin_lock_init(&fs->vqs[i].lock); INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, - virtio_fs_dummy_dispatch_work); + virtio_fs_request_dispatch_work); INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[i].end_reqs); + init_completion(&fs->vqs[i].in_flight_zero); snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), "requests.%u", i - VQ_REQUEST); callbacks[i] = virtio_fs_vq_done; @@@ -703,7 -606,7 +703,7 @@@ static void virtio_fs_remove(struct vir /* This device is going away. No one should get new reference */ list_del_init(&fs->list); virtio_fs_stop_all_queues(fs); - virtio_fs_drain_all_queues(fs); + virtio_fs_drain_all_queues_locked(fs); vdev->config->reset(vdev); virtio_fs_cleanup_vqs(vdev, fs);
@@@ -728,12 -631,12 +728,12 @@@ static int virtio_fs_restore(struct vir } #endif /* CONFIG_PM_SLEEP */
-const static struct virtio_device_id id_table[] = { +static const struct virtio_device_id id_table[] = { { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, {}, };
-const static unsigned int feature_table[] = {}; +static const unsigned int feature_table[] = {};
static struct virtio_driver virtio_fs_driver = { .driver.name = KBUILD_MODNAME, @@@ -754,10 -657,14 +754,10 @@@ __releases(fiq->lock { struct fuse_forget_link *link; struct virtio_fs_forget *forget; - struct scatterlist sg; - struct scatterlist *sgs[] = {&sg}; + struct virtio_fs_forget_req *req; struct virtio_fs *fs; - struct virtqueue *vq; struct virtio_fs_vq *fsvq; - bool notify; u64 unique; - int ret;
link = fuse_dequeue_forget(fiq, 1, NULL); unique = fuse_get_unique(fiq); @@@ -768,19 -675,56 +768,19 @@@
/* Allocate a buffer for the request */ forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); + req = &forget->req;
- forget->ih = (struct fuse_in_header){ + req->ih = (struct fuse_in_header){ .opcode = FUSE_FORGET, .nodeid = link->forget_one.nodeid, .unique = unique, - .len = sizeof(*forget), + .len = sizeof(*req), }; - forget->arg = (struct fuse_forget_in){ + req->arg = (struct fuse_forget_in){ .nlookup = link->forget_one.nlookup, };
- sg_init_one(&sg, forget, sizeof(*forget)); - - /* Enqueue the request */ - spin_lock(&fsvq->lock); - - if (!fsvq->connected) { - kfree(forget); - spin_unlock(&fsvq->lock); - goto out; - } - - vq = fsvq->vq; - dev_dbg(&vq->vdev->dev, "%s\n", __func__); - - ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); - if (ret < 0) { - if (ret == -ENOMEM || ret == -ENOSPC) { - pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n", - ret); - list_add_tail(&forget->list, &fsvq->queued_reqs); - schedule_delayed_work(&fsvq->dispatch_work, - msecs_to_jiffies(1)); - } else { - pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", - ret); - kfree(forget); - } - spin_unlock(&fsvq->lock); - goto out; - } - - fsvq->in_flight++; - notify = virtqueue_kick_prepare(vq); - - spin_unlock(&fsvq->lock); - - if (notify) - virtqueue_notify(vq); -out: + send_forget_request(fsvq, forget, false); kfree(link); }
@@@ -875,7 -819,7 +875,7 @@@ static unsigned int sg_init_fuse_args(s
/* Add a request to a virtqueue and kick the device */ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, - struct fuse_req *req) + struct fuse_req *req, bool in_flight) { /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; @@@ -891,7 -835,6 +891,7 @@@ unsigned int i; int ret; bool notify; + struct fuse_pqueue *fpq;
/* Does the sglist fit on the stack? */ total_sgs = sg_count_fuse_req(req); @@@ -946,17 -889,7 +946,17 @@@ goto out; }
- fsvq->in_flight++; + /* Request successfully sent. */ + fpq = &fsvq->fud->pq; + spin_lock(&fpq->lock); + list_add_tail(&req->list, fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + + if (!in_flight) + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock); @@@ -982,8 -915,9 +982,8 @@@ __releases(fiq->lock { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ struct virtio_fs *fs; - struct fuse_conn *fc; struct fuse_req *req; - struct fuse_pqueue *fpq; + struct virtio_fs_vq *fsvq; int ret;
WARN_ON(list_empty(&fiq->pending)); @@@ -994,41 -928,49 +994,41 @@@ spin_unlock(&fiq->lock);
fs = fiq->priv; - fc = fs->vqs[queue_id].fud->fc;
pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", __func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args));
- fpq = &fs->vqs[queue_id].fud->pq; - spin_lock(&fpq->lock); - if (!fpq->connected) { - spin_unlock(&fpq->lock); - req->out.h.error = -ENODEV; - pr_err("virtio-fs: %s disconnected\n", __func__); - fuse_request_end(fc, req); - return; - } - list_add_tail(&req->list, fpq->processing); - spin_unlock(&fpq->lock); - set_bit(FR_SENT, &req->flags); - /* matches barrier in request_wait_answer() */ - smp_mb__after_atomic(); - -retry: - ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); + fsvq = &fs->vqs[queue_id]; + ret = virtio_fs_enqueue_req(fsvq, req, false); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { - /* Virtqueue full. Retry submission */ - /* TODO use completion instead of timeout */ - usleep_range(20, 30); - goto retry; + /* + * Virtqueue full. Retry submission from worker + * context as we might be holding fc->bg_lock. + */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + inc_in_flight_req(fsvq); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); - spin_lock(&fpq->lock); - clear_bit(FR_SENT, &req->flags); - list_del_init(&req->list); - spin_unlock(&fpq->lock); - fuse_request_end(fc, req); + + /* Can't end request in submission context. Use a worker */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->end_reqs); + schedule_delayed_work(&fsvq->dispatch_work, 0); + spin_unlock(&fsvq->lock); return; } }
-const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { +static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, @@@ -1050,7 -992,6 +1050,7 @@@ static int virtio_fs_fill_super(struct .destroy = true, .no_control = true, .no_force_umount = true, + .no_mount_options = true, };
mutex_lock(&virtio_fs_mutex); diff --combined include/linux/filter.h index a141cb07e76a,fa2d921d7a22..90fde277b8e0 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@@ -65,9 -65,6 +65,9 @@@ struct ctl_table_header /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0
+/* unused opcode to mark special load instruction. Same as BPF_ABS */ +#define BPF_PROBE_MEM 0x20 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0
@@@ -420,7 -417,7 +420,7 @@@ static inline bool insn_is_zext(const s
#define BPF_FIELD_SIZEOF(type, field) \ ({ \ - const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + const int __size = bytes_to_bpf_size(sizeof_member(type, field)); \ BUILD_BUG_ON(__size < 0); \ __size; \ }) @@@ -467,11 -464,10 +467,11 @@@ #define BPF_CALL_x(x, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ - return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) @@@ -497,7 -493,7 +497,7 @@@
#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ({ \ - BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \ + BUILD_BUG_ON(sizeof_member(TYPE, MEMBER) != (SIZE)); \ *(PTR_SIZE) = (SIZE); \ offsetof(TYPE, MEMBER); \ }) @@@ -515,12 -511,10 +515,12 @@@ struct sock_fprog_kern struct sock_filter *filter; };
+/* Some arches need doubleword alignment for their instructions and/or data */ +#define BPF_IMAGE_ALIGNMENT 8 + struct bpf_binary_header { u32 pages; - /* Some arches need word alignment for their instructions */ - u8 image[] __aligned(4); + u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); };
struct bpf_prog { @@@ -608,7 -602,7 +608,7 @@@ static inline void bpf_compute_data_poi { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
- BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(*cb) > sizeof_member(struct sk_buff, cb)); cb->data_meta = skb->data - skb_metadata_len(skb); cb->data_end = skb->data + skb_headlen(skb); } @@@ -646,9 -640,9 +646,9 @@@ static inline u8 *bpf_skb_cb(struct sk_ * attached to sockets, we need to clear the bpf_skb_cb() area * to not leak previous contents to user space. */ - BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN); - BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != - FIELD_SIZEOF(struct qdisc_skb_cb, data)); + BUILD_BUG_ON(sizeof_member(struct __sk_buff, cb) != BPF_SKB_CB_LEN); + BUILD_BUG_ON(sizeof_member(struct __sk_buff, cb) != + sizeof_member(struct qdisc_skb_cb, data));
return qdisc_skb_cb(skb)->data; } @@@ -776,12 -770,8 +776,12 @@@ bpf_ctx_narrow_access_offset(u32 off, u
static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { - set_vm_flush_reset_perms(fp); - set_memory_ro((unsigned long)fp, fp->pages); +#ifndef CONFIG_BPF_JIT_ALWAYS_ON + if (!fp->jited) { + set_vm_flush_reset_perms(fp); + set_memory_ro((unsigned long)fp, fp->pages); + } +#endif }
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) @@@ -956,9 -946,6 +956,9 @@@ void *bpf_jit_alloc_exec(unsigned long void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp);
+int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, + struct bpf_jit_poke_descriptor *poke); + int bpf_jit_get_func_addr(const struct bpf_prog *prog, const struct bpf_insn *insn, bool extra_pass, u64 *func_addr, bool *func_addr_fixed); @@@ -1057,23 -1044,11 +1057,23 @@@ static inline bool ebpf_jit_enabled(voi return false; }
+static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) +{ + return false; +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; }
+static inline int +bpf_jit_add_poke_descriptor(struct bpf_prog *prog, + struct bpf_jit_poke_descriptor *poke) +{ + return -ENOTSUPP; +} + static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); @@@ -1124,6 -1099,7 +1124,6 @@@ static inline void bpf_get_prog_name(co
#endif /* CONFIG_BPF_JIT */
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
#define BPF_ANC BIT(15) diff --combined include/linux/kvm_host.h index 7ed1e2f8641e,ebbf4f1a221a..6c626d5455cc --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@@ -149,7 -149,7 +149,7 @@@ static inline bool is_error_page(struc #define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ - BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ + BUILD_BUG_ON((unsigned)(nr) >= (sizeof_member(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ }) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) @@@ -266,8 -266,7 +266,8 @@@ struct kvm_vcpu struct preempt_notifier preempt_notifier; #endif int cpu; - int vcpu_id; + int vcpu_id; /* id given by userspace at creation */ + int vcpu_idx; /* index in kvm->vcpus array */ int srcu_idx; int mode; u64 requests; @@@ -279,6 -278,7 +279,6 @@@ struct mutex mutex; struct kvm_run *run;
- int guest_xcr0_loaded; struct swait_queue_head wq; struct pid __rcu *pid; int sigset_active; @@@ -571,7 -571,13 +571,7 @@@ static inline struct kvm_vcpu *kvm_get_
static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *tmp; - int idx; - - kvm_for_each_vcpu(idx, tmp, vcpu->kvm) - if (tmp == vcpu) - return idx; - BUG(); + return vcpu->vcpu_idx; }
#define kvm_for_each_memslot(memslot, slots) \ @@@ -616,7 -622,6 +616,7 @@@ void kvm_exit(void)
void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); +void kvm_put_kvm_no_destroy(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { @@@ -741,28 -746,6 +741,28 @@@ int kvm_write_guest_offset_cached(struc unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); + +#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + type __user *__uaddr = (type __user *)(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = put_user(value, __uaddr); \ + if (!__ret) \ + mark_page_dirty(kvm, gfn); \ + __ret; \ +}) + +#define kvm_put_guest(kvm, gpa, value, type) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), (value), type); \ +}) + int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); @@@ -808,8 -791,6 +808,8 @@@ void kvm_reload_remote_mmus(struct kvm bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap, cpumask_var_t tmp); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); +bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, + unsigned long *vcpu_bitmap);
long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@@ -985,7 -966,6 +985,7 @@@ int kvm_cpu_has_pending_timer(struct kv void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn); +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier { struct hlist_node link; @@@ -1260,7 -1240,7 +1260,7 @@@ extern unsigned int halt_poll_ns_grow_s extern unsigned int halt_poll_ns_shrink;
struct kvm_device { - struct kvm_device_ops *ops; + const struct kvm_device_ops *ops; struct kvm *kvm; void *private; struct list_head vm_node; @@@ -1313,7 -1293,7 +1313,7 @@@ struct kvm_device_ops void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); -int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); +int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops; @@@ -1402,10 -1382,4 +1402,10 @@@ static inline int kvm_arch_vcpu_run_pid } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr); + #endif diff --combined include/linux/slab.h index 877a95c6a2d2,2afb630bce2a..270f03ee3f83 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@@ -179,7 -179,7 +179,7 @@@ void memcg_deactivate_kmem_caches(struc sizeof(struct __struct), \ __alignof__(struct __struct), (__flags), \ offsetof(struct __struct, __field), \ - sizeof_field(struct __struct, __field), NULL) + sizeof_member(struct __struct, __field), NULL)
/* * Common kmalloc functions provided by all allocators @@@ -493,10 -493,6 +493,10 @@@ static __always_inline void *kmalloc_la * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * + * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN + * bytes. For @size of power of two bytes, the alignment is also guaranteed + * to be at least to the size. + * * The @flags argument may be one of the GFP flags defined at * include/linux/gfp.h and described at * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` @@@ -561,6 -557,26 +561,6 @@@ static __always_inline void *kmalloc(si return __kmalloc(size, flags); }
-/* - * Determine size used for the nth kmalloc cache. - * return size or 0 if a kmalloc cache for that - * size does not exist - */ -static __always_inline unsigned int kmalloc_size(unsigned int n) -{ -#ifndef CONFIG_SLOB - if (n > 2) - return 1U << n; - - if (n == 1 && KMALLOC_MIN_SIZE <= 32) - return 96; - - if (n == 2 && KMALLOC_MIN_SIZE <= 64) - return 192; -#endif - return 0; -} - static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB diff --combined include/net/sock.h index 87d54ef57f00,6a5a386410ff..d036017e6b8b --- a/include/net/sock.h +++ b/include/net/sock.h @@@ -66,6 -66,7 +66,6 @@@ #include <net/checksum.h> #include <net/tcp_states.h> #include <linux/net_tstamp.h> -#include <net/smc.h> #include <net/l3mdev.h>
/* @@@ -859,17 -860,17 +859,17 @@@ static inline gfp_t sk_gfp_mask(const s
static inline void sk_acceptq_removed(struct sock *sk) { - sk->sk_ack_backlog--; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1); }
static inline void sk_acceptq_added(struct sock *sk) { - sk->sk_ack_backlog++; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); }
static inline bool sk_acceptq_is_full(const struct sock *sk) { - return sk->sk_ack_backlog > sk->sk_max_ack_backlog; + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); }
/* @@@ -877,17 -878,12 +877,17 @@@ */ static inline int sk_stream_min_wspace(const struct sock *sk) { - return sk->sk_wmem_queued >> 1; + return READ_ONCE(sk->sk_wmem_queued) >> 1; }
static inline int sk_stream_wspace(const struct sock *sk) { - return sk->sk_sndbuf - sk->sk_wmem_queued; + return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued); +} + +static inline void sk_wmem_queued_add(struct sock *sk, int val) +{ + WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); }
void sk_stream_write_space(struct sock *sk); @@@ -899,11 -895,11 +899,11 @@@ static inline void __sk_add_backlog(str skb_dst_force(skb);
if (!sk->sk_backlog.tail) - sk->sk_backlog.head = skb; + WRITE_ONCE(sk->sk_backlog.head, skb); else sk->sk_backlog.tail->next = skb;
- sk->sk_backlog.tail = skb; + WRITE_ONCE(sk->sk_backlog.tail, skb); skb->next = NULL; }
@@@ -953,8 -949,8 +953,8 @@@ static inline void sk_incoming_cpu_upda { int cpu = raw_smp_processor_id();
- if (unlikely(sk->sk_incoming_cpu != cpu)) - sk->sk_incoming_cpu = cpu; + if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu)) + WRITE_ONCE(sk->sk_incoming_cpu, cpu); }
static inline void sock_rps_record_flow_hash(__u32 hash) @@@ -1211,7 -1207,7 +1211,7 @@@ static inline void sk_refcnt_debug_rele
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { - if (sk->sk_wmem_queued >= sk->sk_sndbuf) + if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false;
return sk->sk_prot->stream_memory_free ? @@@ -1471,7 -1467,7 +1471,7 @@@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cac static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; + sk_wmem_queued_add(sk, -skb->truesize); sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && !sk->sk_tx_skb_cache && !skb_cloned(skb)) { @@@ -1488,7 -1484,7 +1488,7 @@@ static inline void sock_release_ownersh sk->sk_lock.owned = 0;
/* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } }
@@@ -1939,8 -1935,8 +1939,8 @@@ struct dst_entry *sk_dst_check(struct s
static inline void sk_dst_confirm(struct sock *sk) { - if (!sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 1; + if (!READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 1); }
static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) @@@ -1950,10 -1946,10 +1950,10 @@@ unsigned long now = jiffies;
/* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - if (sk && sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 0; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } }
@@@ -2018,7 -2014,7 +2018,7 @@@ static inline int skb_copy_to_page_noca skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; } @@@ -2224,14 -2220,10 +2224,14 @@@ static inline void sk_wake_async(const
static inline void sk_stream_moderate_sndbuf(struct sock *sk) { - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { - sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); - sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF); - } + u32 val; + + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + return; + + val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); + + WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); }
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, @@@ -2241,17 -2233,12 +2241,17 @@@ * sk_page_frag - return an appropriate page_frag * @sk: socket * - * If socket allocation mode allows current thread to sleep, it means its - * safe to use the per task page_frag instead of the per socket one. + * Use the per task page_frag instead of the per socket one for + * optimization when we know that we're in the normal context and owns + * everything that's associated with %current. + * + * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest + * inside other socket operations and end up recursing into sk_page_frag() + * while it's already in use. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_allow_blocking(sk->sk_allocation)) + if (gfpflags_normal_context(sk->sk_allocation)) return ¤t->task_frag;
return &sk->sk_frag; @@@ -2264,7 -2251,7 +2264,7 @@@ bool sk_page_frag_refill(struct sock *s */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); }
static inline gfp_t gfp_any(void) @@@ -2284,9 -2271,7 +2284,9 @@@ static inline long sock_sndtimeo(const
static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) { - return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1; + int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len); + + return v ?: 1; }
/* Alas, with timeout socket operations are not restartable. @@@ -2305,7 -2290,7 +2305,7 @@@ struct sock_skb_cb * using skb->cb[] would keep using it directly and utilize its * alignement guarantee. */ - #define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \ + #define SOCK_SKB_CB_OFFSET ((sizeof_member(struct sk_buff, cb) - \ sizeof(struct sock_skb_cb)))
#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \ @@@ -2341,7 -2326,7 +2341,7 @@@ static inline ktime_t sock_read_timesta
return kt; #else - return sk->sk_stamp; + return READ_ONCE(sk->sk_stamp); #endif }
@@@ -2352,7 -2337,7 +2352,7 @@@ static inline void sock_write_timestamp sk->sk_stamp = kt; write_sequnlock(&sk->sk_stamp_seq); #else - sk->sk_stamp = kt; + WRITE_ONCE(sk->sk_stamp, kt); #endif }
@@@ -2527,7 -2512,7 +2527,7 @@@ static inline bool sk_listener(const st return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); }
-void sock_enable_timestamp(struct sock *sk, int flag); +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type);
diff --combined kernel/bpf/cgroup.c index 9f90d3c92bda,3ff6df3ab022..b0adc5e83638 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@@ -180,8 -180,8 +180,8 @@@ static void activate_effective_progs(st enum bpf_attach_type type, struct bpf_prog_array *old_array) { - rcu_swap_protected(cgrp->bpf.effective[type], old_array, - lockdep_is_held(&cgroup_mutex)); + old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array, + lockdep_is_held(&cgroup_mutex)); /* free prog array after grace period, since __cgroup_bpf_run_*() * might be still walking the array */ @@@ -1311,12 -1311,12 +1311,12 @@@ static bool sysctl_is_valid_access(int return false;
switch (off) { - case offsetof(struct bpf_sysctl, write): + case bpf_ctx_range(struct bpf_sysctl, write): if (type != BPF_READ) return false; bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); - case offsetof(struct bpf_sysctl, file_pos): + case bpf_ctx_range(struct bpf_sysctl, file_pos): if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); @@@ -1341,7 -1341,7 +1341,7 @@@ static u32 sysctl_convert_ctx_access(en *insn++ = BPF_LDX_MEM( BPF_SIZE(si->code), si->dst_reg, si->src_reg, bpf_target_off(struct bpf_sysctl_kern, write, - FIELD_SIZEOF(struct bpf_sysctl_kern, + sizeof_member(struct bpf_sysctl_kern, write), target_size)); break; diff --combined kernel/bpf/local_storage.c index 2ba750725cb2,ea4117fabdec..e13992995a05 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@@ -357,7 -357,7 +357,7 @@@ static int cgroup_storage_check_btf(con * The first field must be a 64 bit integer at 0 offset. */ m = (struct btf_member *)(key_type + 1); - size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id); + size = sizeof_member(struct bpf_cgroup_storage_key, cgroup_inode_id); if (!btf_member_is_reg_int(btf, key_type, m, 0, size)) return -EINVAL;
@@@ -366,7 -366,7 +366,7 @@@ */ m++; offset = offsetof(struct bpf_cgroup_storage_key, attach_type); - size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type); + size = sizeof_member(struct bpf_cgroup_storage_key, attach_type); if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) return -EINVAL;
@@@ -569,7 -569,7 +569,7 @@@ void bpf_cgroup_storage_link(struct bpf return;
storage->key.attach_type = type; - storage->key.cgroup_inode_id = cgroup->kn->id.id; + storage->key.cgroup_inode_id = cgroup_id(cgroup);
map = storage->map;
diff --combined kernel/fork.c index 2508a4f238a3,1b45b28d4bf5..f65ceece3459 --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -40,6 -40,7 +40,6 @@@ #include <linux/binfmts.h> #include <linux/mman.h> #include <linux/mmu_notifier.h> -#include <linux/hmm.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/vmacache.h> @@@ -93,7 -94,6 +93,7 @@@ #include <linux/livepatch.h> #include <linux/thread_info.h> #include <linux/stackleak.h> +#include <linux/kasan.h>
#include <asm/pgtable.h> #include <asm/pgalloc.h> @@@ -224,9 -224,6 +224,9 @@@ static unsigned long *alloc_thread_stac if (!s) continue;
+ /* Clear the KASAN shadow of the stack. */ + kasan_unpoison_shadow(s->addr, THREAD_SIZE); + /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE);
@@@ -1286,8 -1283,24 +1286,8 @@@ static int wait_for_vfork_done(struct t * restoring the old one. . . * Eric Biederman 10 January 1998 */ -void mm_release(struct task_struct *tsk, struct mm_struct *mm) +static void mm_release(struct task_struct *tsk, struct mm_struct *mm) { - /* Get rid of any futexes when releasing the mm */ -#ifdef CONFIG_FUTEX - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); - tsk->robust_list = NULL; - } -#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; - } -#endif - if (unlikely(!list_empty(&tsk->pi_state_list))) - exit_pi_state_list(tsk); -#endif - uprobe_free_utask(tsk);
/* Get rid of any cached register state */ @@@ -1320,18 -1333,6 +1320,18 @@@ complete_vfork_done(tsk); }
+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + futex_exit_release(tsk); + mm_release(tsk, mm); +} + +void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + futex_exec_release(tsk); + mm_release(tsk, mm); +} + /** * dup_mm() - duplicates an existing mm structure * @tsk: the task_struct with which the new mm will be associated. @@@ -1516,11 -1517,6 +1516,11 @@@ static int copy_sighand(unsigned long c spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); spin_unlock_irq(¤t->sighand->siglock); + + /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */ + if (clone_flags & CLONE_CLEAR_SIGHAND) + flush_signal_handlers(tsk, 0); + return 0; }
@@@ -1699,68 -1695,12 +1699,68 @@@ static int pidfd_release(struct inode * }
#ifdef CONFIG_PROC_FS +/** + * pidfd_show_fdinfo - print information about a pidfd + * @m: proc fdinfo file + * @f: file referencing a pidfd + * + * Pid: + * This function will print the pid that a given pidfd refers to in the + * pid namespace of the procfs instance. + * If the pid namespace of the process is not a descendant of the pid + * namespace of the procfs instance 0 will be shown as its pid. This is + * similar to calling getppid() on a process whose parent is outside of + * its pid namespace. + * + * NSpid: + * If pid namespaces are supported then this function will also print + * the pid of a given pidfd refers to for all descendant pid namespaces + * starting from the current pid namespace of the instance, i.e. the + * Pid field and the first entry in the NSpid field will be identical. + * If the pid namespace of the process is not a descendant of the pid + * namespace of the procfs instance 0 will be shown as its first NSpid + * entry and no others will be shown. + * Note that this differs from the Pid and NSpid fields in + * /proc/<pid>/status where Pid and NSpid are always shown relative to + * the pid namespace of the procfs instance. The difference becomes + * obvious when sending around a pidfd between pid namespaces from a + * different branch of the tree, i.e. where no ancestoral relation is + * present between the pid namespaces: + * - create two new pid namespaces ns1 and ns2 in the initial pid + * namespace (also take care to create new mount namespaces in the + * new pid namespace and mount procfs) + * - create a process with a pidfd in ns1 + * - send pidfd from ns1 to ns2 + * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid + * have exactly one entry, which is 0 + */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { - struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); struct pid *pid = f->private_data; + struct pid_namespace *ns; + pid_t nr = -1; + + if (likely(pid_has_task(pid, PIDTYPE_PID))) { + ns = proc_pid_ns(file_inode(m->file)); + nr = pid_nr_ns(pid, ns); + } + + seq_put_decimal_ll(m, "Pid:\t", nr); + +#ifdef CONFIG_PID_NS + seq_put_decimal_ll(m, "\nNSpid:\t", nr); + if (nr > 0) { + int i;
- seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); + /* If nr is non-zero it means that 'pid' is valid and that + * ns, i.e. the pid namespace associated with the procfs + * instance, is in the pid namespace hierarchy of pid. + * Start at one below the already printed level. + */ + for (i = ns->level + 1; i <= pid->level; i++) + seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); + } +#endif seq_putc(m, '\n'); } #endif @@@ -1768,11 -1708,11 +1768,11 @@@ /* * Poll support for process exit notification. */ -static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) +static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct task_struct *task; struct pid *pid = file->private_data; - int poll_flags = 0; + __poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
@@@ -1784,7 -1724,7 +1784,7 @@@ * group, then poll(2) should block, similar to the wait(2) family. */ if (!task || (task->exit_state && thread_group_empty(task))) - poll_flags = POLLIN | POLLRDNORM; + poll_flags = EPOLLIN | EPOLLRDNORM; rcu_read_unlock();
return poll_flags; @@@ -2086,8 -2026,7 +2086,8 @@@ static __latent_entropy struct task_str stackleak_task_init(p);
if (pid != &init_struct_pid) { - pid = alloc_pid(p->nsproxy->pid_ns_for_children); + pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, + args->set_tid_size); if (IS_ERR(pid)) { retval = PTR_ERR(pid); goto bad_fork_cleanup_thread; @@@ -2123,8 -2062,14 +2123,8 @@@ #ifdef CONFIG_BLOCK p->plug = NULL; #endif -#ifdef CONFIG_FUTEX - p->robust_list = NULL; -#ifdef CONFIG_COMPAT - p->compat_robust_list = NULL; -#endif - INIT_LIST_HEAD(&p->pi_state_list); - p->pi_state_cache = NULL; -#endif + futex_init_task(p); + /* * sigaltstack should be cleared when sharing the same VM */ @@@ -2185,7 -2130,7 +2185,7 @@@ */
p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boottime_ns(); + p->start_boottime = ktime_get_boottime_ns();
/* * Make it visible to the rest of the system, but dont wake it up yet. @@@ -2584,7 -2529,6 +2584,7 @@@ noinline static int copy_clone_args_fro { int err; struct clone_args args; + pid_t *kset_tid = kargs->set_tid;
if (unlikely(usize > PAGE_SIZE)) return -E2BIG; @@@ -2595,15 -2539,6 +2595,15 @@@ if (err) return err;
+ if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL)) + return -EINVAL; + + if (unlikely(!args.set_tid && args.set_tid_size > 0)) + return -EINVAL; + + if (unlikely(args.set_tid && args.set_tid_size == 0)) + return -EINVAL; + /* * Verify that higher 32bits of exit_signal are unset and that * it is a valid signal @@@ -2621,51 -2556,18 +2621,51 @@@ .stack = args.stack, .stack_size = args.stack_size, .tls = args.tls, + .set_tid_size = args.set_tid_size, };
+ if (args.set_tid && + copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), + (kargs->set_tid_size * sizeof(pid_t)))) + return -EFAULT; + + kargs->set_tid = kset_tid; + return 0; }
-static bool clone3_args_valid(const struct kernel_clone_args *kargs) +/** + * clone3_stack_valid - check and prepare stack + * @kargs: kernel clone args + * + * Verify that the stack arguments userspace gave us are sane. + * In addition, set the stack direction for userspace since it's easy for us to + * determine. + */ +static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) { - /* - * All lower bits of the flag word are taken. - * Verify that no other unknown flags are passed along. - */ - if (kargs->flags & ~CLONE_LEGACY_FLAGS) + if (kargs->stack == 0) { + if (kargs->stack_size > 0) + return false; + } else { + if (kargs->stack_size == 0) + return false; + + if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) + return false; + +#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64) + kargs->stack += kargs->stack_size; +#endif + } + + return true; +} + +static bool clone3_args_valid(struct kernel_clone_args *kargs) +{ + /* Verify that no unknown flags are passed along. */ + if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND)) return false;
/* @@@ -2675,17 -2577,10 +2675,17 @@@ if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) return false;
+ if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == + (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) + return false; + if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && kargs->exit_signal) return false;
+ if (!clone3_stack_valid(kargs)) + return false; + return true; }
@@@ -2705,9 -2600,6 +2705,9 @@@ SYSCALL_DEFINE2(clone3, struct clone_ar int err;
struct kernel_clone_args kargs; + pid_t set_tid[MAX_PID_NS_LEVEL]; + + kargs.set_tid = set_tid;
err = copy_clone_args_from_user(&kargs, uargs, size); if (err) @@@ -2796,7 -2688,7 +2796,7 @@@ void __init proc_caches_init(void mm_size, ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), + sizeof_member(struct mm_struct, saved_auxv), NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); @@@ -3033,7 -2925,7 +3033,7 @@@ int sysctl_max_threads(struct ctl_tabl struct ctl_table t; int ret; int threads = max_threads; - int min = MIN_THREADS; + int min = 1; int max = MAX_THREADS;
t = *table; @@@ -3045,7 -2937,7 +3045,7 @@@ if (ret || !write) return ret;
- set_max_threads(threads); + max_threads = threads;
return 0; } diff --combined kernel/signal.c index bcd46f547db3,52628a975705..4c1a171eb68f --- a/kernel/signal.c +++ b/kernel/signal.c @@@ -2205,8 -2205,8 +2205,8 @@@ static void ptrace_stop(int exit_code, */ preempt_disable(); read_unlock(&tasklist_lock); - preempt_enable_no_resched(); cgroup_enter_frozen(); + preempt_enable_no_resched(); freezable_schedule(); cgroup_leave_frozen(true); } else { @@@ -4548,11 -4548,11 +4548,11 @@@ static inline void siginfo_buildtime_ch BUILD_BUG_ON(offsetof(struct siginfo, si_pid) != offsetof(struct siginfo, si_addr)); if (sizeof(int) == sizeof(void __user *)) { - BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) != + BUILD_BUG_ON(sizeof_member(struct siginfo, si_pid) != sizeof(void __user *)); } else { - BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) + - sizeof_field(struct siginfo, si_uid)) != + BUILD_BUG_ON((sizeof_member(struct siginfo, si_pid) + + sizeof_member(struct siginfo, si_uid)) != sizeof(void __user *)); BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) != offsetof(struct siginfo, si_uid)); @@@ -4560,10 -4560,10 +4560,10 @@@ #ifdef CONFIG_COMPAT BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) != offsetof(struct compat_siginfo, si_addr)); - BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) != + BUILD_BUG_ON(sizeof_member(struct compat_siginfo, si_pid) != sizeof(compat_uptr_t)); - BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) != - sizeof_field(struct siginfo, si_pid)); + BUILD_BUG_ON(sizeof_member(struct compat_siginfo, si_pid) != + sizeof_member(struct siginfo, si_pid)); #endif }
diff --combined net/caif/caif_socket.c index ef14da50a981,0dbd93bb1939..61bd13d3d637 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@@ -953,7 -953,7 +953,7 @@@ static __poll_t caif_poll(struct file * mask |= EPOLLRDHUP;
/* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= EPOLLIN | EPOLLRDNORM;
@@@ -1033,7 -1033,7 +1033,7 @@@ static int caif_create(struct net *net .owner = THIS_MODULE, .obj_size = sizeof(struct caifsock), .useroffset = offsetof(struct caifsock, conn_req.param), - .usersize = sizeof_field(struct caifsock, conn_req.param) + .usersize = sizeof_member(struct caifsock, conn_req.param) };
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) diff --combined net/core/dev.c index 5326ae53982e,c434e94167ca..20a829b836ad --- a/net/core/dev.c +++ b/net/core/dev.c @@@ -146,7 -146,6 +146,7 @@@ #include "net-sysfs.h"
#define MAX_GRO_SKBS 8 +#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@@ -229,122 -228,6 +229,122 @@@ static inline void rps_unlock(struct so #endif }
+static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev, + const char *name) +{ + struct netdev_name_node *name_node; + + name_node = kmalloc(sizeof(*name_node), GFP_KERNEL); + if (!name_node) + return NULL; + INIT_HLIST_NODE(&name_node->hlist); + name_node->dev = dev; + name_node->name = name; + return name_node; +} + +static struct netdev_name_node * +netdev_name_node_head_alloc(struct net_device *dev) +{ + struct netdev_name_node *name_node; + + name_node = netdev_name_node_alloc(dev, dev->name); + if (!name_node) + return NULL; + INIT_LIST_HEAD(&name_node->list); + return name_node; +} + +static void netdev_name_node_free(struct netdev_name_node *name_node) +{ + kfree(name_node); +} + +static void netdev_name_node_add(struct net *net, + struct netdev_name_node *name_node) +{ + hlist_add_head_rcu(&name_node->hlist, + dev_name_hash(net, name_node->name)); +} + +static void netdev_name_node_del(struct netdev_name_node *name_node) +{ + hlist_del_rcu(&name_node->hlist); +} + +static struct netdev_name_node *netdev_name_node_lookup(struct net *net, + const char *name) +{ + struct hlist_head *head = dev_name_hash(net, name); + struct netdev_name_node *name_node; + + hlist_for_each_entry(name_node, head, hlist) + if (!strcmp(name_node->name, name)) + return name_node; + return NULL; +} + +static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, + const char *name) +{ + struct hlist_head *head = dev_name_hash(net, name); + struct netdev_name_node *name_node; + + hlist_for_each_entry_rcu(name_node, head, hlist) + if (!strcmp(name_node->name, name)) + return name_node; + return NULL; +} + +int netdev_name_node_alt_create(struct net_device *dev, const char *name) +{ + struct netdev_name_node *name_node; + struct net *net = dev_net(dev); + + name_node = netdev_name_node_lookup(net, name); + if (name_node) + return -EEXIST; + name_node = netdev_name_node_alloc(dev, name); + if (!name_node) + return -ENOMEM; + netdev_name_node_add(net, name_node); + /* The node that holds dev->name acts as a head of per-device list. */ + list_add_tail(&name_node->list, &dev->name_node->list); + + return 0; +} +EXPORT_SYMBOL(netdev_name_node_alt_create); + +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) +{ + list_del(&name_node->list); + netdev_name_node_del(name_node); + kfree(name_node->name); + netdev_name_node_free(name_node); +} + +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) +{ + struct netdev_name_node *name_node; + struct net *net = dev_net(dev); + + name_node = netdev_name_node_lookup(net, name); + if (!name_node) + return -ENOENT; + __netdev_name_node_alt_destroy(name_node); + + return 0; +} +EXPORT_SYMBOL(netdev_name_node_alt_destroy); + +static void netdev_name_node_alt_flush(struct net_device *dev) +{ + struct netdev_name_node *name_node, *tmp; + + list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) + __netdev_name_node_alt_destroy(name_node); +} + /* Device list insertion */ static void list_netdevice(struct net_device *dev) { @@@ -354,7 -237,7 +354,7 @@@
write_lock_bh(&dev_base_lock); list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); + netdev_name_node_add(net, dev->name_node); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); @@@ -372,7 -255,7 +372,7 @@@ static void unlist_netdevice(struct net /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); list_del_rcu(&dev->dev_list); - hlist_del_rcu(&dev->name_hlist); + netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock);
@@@ -393,6 -276,88 +393,6 @@@ static RAW_NOTIFIER_HEAD(netdev_chain) DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data);
-#ifdef CONFIG_LOCKDEP -/* - * register_netdevice() inits txq->_xmit_lock and sets lockdep class - * according to dev->type - */ -static const unsigned short netdev_lock_type[] = { - ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, - ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, - ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, - ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, - ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, - ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, - ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, - ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, - ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, - ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, - ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, - ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, - ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, - ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, - ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; - -static const char *const netdev_lock_name[] = { - "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", - "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", - "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", - "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", - "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", - "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", - "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", - "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", - "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", - "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", - "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", - "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", - "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", - "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; - -static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; -static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; - -static inline unsigned short netdev_lock_pos(unsigned short dev_type) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) - if (netdev_lock_type[i] == dev_type) - return i; - /* the last key is used by default */ - return ARRAY_SIZE(netdev_lock_type) - 1; -} - -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ - int i; - - i = netdev_lock_pos(dev_type); - lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], - netdev_lock_name[i]); -} - -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ - int i; - - i = netdev_lock_pos(dev->type); - lockdep_set_class_and_name(&dev->addr_list_lock, - &netdev_addr_lock_key[i], - netdev_lock_name[i]); -} -#else -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ -} -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ -} -#endif - /******************************************************************************* * * Protocol management and registration routines @@@ -768,10 -733,14 +768,10 @@@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst
struct net_device *__dev_get_by_name(struct net *net, const char *name) { - struct net_device *dev; - struct hlist_head *head = dev_name_hash(net, name); - - hlist_for_each_entry(dev, head, name_hlist) - if (!strncmp(dev->name, name, IFNAMSIZ)) - return dev; + struct netdev_name_node *node_name;
- return NULL; + node_name = netdev_name_node_lookup(net, name); + return node_name ? node_name->dev : NULL; } EXPORT_SYMBOL(__dev_get_by_name);
@@@ -789,10 -758,14 +789,10 @@@
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) { - struct net_device *dev; - struct hlist_head *head = dev_name_hash(net, name); + struct netdev_name_node *node_name;
- hlist_for_each_entry_rcu(dev, head, name_hlist) - if (!strncmp(dev->name, name, IFNAMSIZ)) - return dev; - - return NULL; + node_name = netdev_name_node_lookup_rcu(net, name); + return node_name ? node_name->dev : NULL; } EXPORT_SYMBOL(dev_get_by_name_rcu);
@@@ -928,7 -901,7 +928,7 @@@ EXPORT_SYMBOL(dev_get_by_napi_id) * * The use of raw_seqcount_begin() and cond_resched() before * retrying is required as we want to give the writers a chance - * to complete when CONFIG_PREEMPT is not set. + * to complete when CONFIG_PREEMPTION is not set. */ int netdev_get_name(struct net *net, char *name, int ifindex) { @@@ -1168,8 -1141,8 +1168,8 @@@ int dev_alloc_name(struct net_device *d } EXPORT_SYMBOL(dev_alloc_name);
-int dev_get_valid_name(struct net *net, struct net_device *dev, - const char *name) +static int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { BUG_ON(!net);
@@@ -1185,6 -1158,7 +1185,6 @@@
return 0; } -EXPORT_SYMBOL(dev_get_valid_name);
/** * dev_change_name - change name of a device @@@ -1258,13 -1232,13 +1258,13 @@@ rollback netdev_adjacent_rename_links(dev, oldname);
write_lock_bh(&dev_base_lock); - hlist_del_rcu(&dev->name_hlist); + netdev_name_node_del(dev->name_node); write_unlock_bh(&dev_base_lock);
synchronize_rcu();
write_lock_bh(&dev_base_lock); - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); + netdev_name_node_add(net, dev->name_node); write_unlock_bh(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); @@@ -1314,8 -1288,8 +1314,8 @@@ int dev_set_alias(struct net_device *de }
mutex_lock(&ifalias_mutex); - rcu_swap_protected(dev->ifalias, new_alias, - mutex_is_locked(&ifalias_mutex)); + new_alias = rcu_replace_pointer(dev->ifalias, new_alias, + mutex_is_locked(&ifalias_mutex)); mutex_unlock(&ifalias_mutex);
if (new_alias) @@@ -1643,62 -1617,6 +1643,62 @@@ static int call_netdevice_notifier(stru return nb->notifier_call(nb, val, &info); }
+static int call_netdevice_register_notifiers(struct notifier_block *nb, + struct net_device *dev) +{ + int err; + + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + return err; + + if (!(dev->flags & IFF_UP)) + return 0; + + call_netdevice_notifier(nb, NETDEV_UP, dev); + return 0; +} + +static void call_netdevice_unregister_notifiers(struct notifier_block *nb, + struct net_device *dev) +{ + if (dev->flags & IFF_UP) { + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); + } + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); +} + +static int call_netdevice_register_net_notifiers(struct notifier_block *nb, + struct net *net) +{ + struct net_device *dev; + int err; + + for_each_netdev(net, dev) { + err = call_netdevice_register_notifiers(nb, dev); + if (err) + goto rollback; + } + return 0; + +rollback: + for_each_netdev_continue_reverse(net, dev) + call_netdevice_unregister_notifiers(nb, dev); + return err; +} + +static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb, + struct net *net) +{ + struct net_device *dev; + + for_each_netdev(net, dev) + call_netdevice_unregister_notifiers(nb, dev); +} + static int dev_boot_phase = 1;
/** @@@ -1717,6 -1635,8 +1717,6 @@@
int register_netdevice_notifier(struct notifier_block *nb) { - struct net_device *dev; - struct net_device *last; struct net *net; int err;
@@@ -1729,9 -1649,17 +1729,9 @@@ if (dev_boot_phase) goto unlock; for_each_net(net) { - for_each_netdev(net, dev) { - err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - call_netdevice_notifier(nb, NETDEV_UP, dev); - } + err = call_netdevice_register_net_notifiers(nb, net); + if (err) + goto rollback; }
unlock: @@@ -1740,9 -1668,22 +1740,9 @@@ return err;
rollback: - last = dev; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev == last) - goto outroll; - - if (dev->flags & IFF_UP) { - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, - dev); - call_netdevice_notifier(nb, NETDEV_DOWN, dev); - } - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); - } - } + for_each_net_continue_reverse(net) + call_netdevice_unregister_net_notifiers(nb, net);
-outroll: raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } @@@ -1792,80 -1733,6 +1792,80 @@@ unlock } EXPORT_SYMBOL(unregister_netdevice_notifier);
+/** + * register_netdevice_notifier_net - register a per-netns network notifier block + * @net: network namespace + * @nb: notifier + * + * Register a notifier to be called when network device events occur. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + * + * When registered all registration and up events are replayed + * to the new notifier to allow device to have a race free + * view of the network device list. + */ + +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(&net->netdev_chain, nb); + if (err) + goto unlock; + if (dev_boot_phase) + goto unlock; + + err = call_netdevice_register_net_notifiers(nb, net); + if (err) + goto chain_unregister; + +unlock: + rtnl_unlock(); + return err; + +chain_unregister: + raw_notifier_chain_unregister(&netdev_chain, nb); + goto unlock; +} +EXPORT_SYMBOL(register_netdevice_notifier_net); + +/** + * unregister_netdevice_notifier_net - unregister a per-netns + * network notifier block + * @net: network namespace + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_netdevice_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + * + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. + */ + +int unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); + if (err) + goto unlock; + + call_netdevice_unregister_net_notifiers(nb, net); + +unlock: + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_net); + /** * call_netdevice_notifiers_info - call all network notifier blocks * @val: value passed unmodified to notifier function @@@ -1878,18 -1745,7 +1878,18 @@@ static int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info) { + struct net *net = dev_net(info->dev); + int ret; + ASSERT_RTNL(); + + /* Run per-netns notifier block chain first, then run the global one. + * Hopefully, one day, the global one is going to be removed after + * all notifier block registrators get converted to be per-netns. + */ + ret = raw_notifier_call_chain(&net->netdev_chain, val, info); + if (ret & NOTIFY_STOP_MASK) + return ret; return raw_notifier_call_chain(&netdev_chain, val, info); }
@@@ -2915,7 -2771,7 +2915,7 @@@ static struct dev_kfree_skb_cb *get_kfr void netif_schedule_queue(struct netdev_queue *txq) { rcu_read_lock(); - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) { + if (!netif_xmit_stopped(txq)) { struct Qdisc *q = rcu_dereference(txq->qdisc);
__netif_schedule(q); @@@ -3083,9 -2939,12 +3083,9 @@@ int skb_checksum_help(struct sk_buff *s offset += skb->csum_offset; BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
- if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__sum16))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } + ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); + if (ret) + goto out;
*(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: @@@ -3120,11 -2979,12 +3120,11 @@@ int skb_crc32c_csum_help(struct sk_buf ret = -EINVAL; goto out; } - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__le32))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } + + ret = skb_ensure_writable(skb, offset + sizeof(__le32)); + if (ret) + goto out; + crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, skb->len - start, ~(__u32)0, crc32c_csum_stub)); @@@ -3607,7 -3467,7 +3607,7 @@@ static inline int __dev_xmit_skb(struc qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && qdisc_run_begin(q)) { if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { @@@ -5586,7 -5446,7 +5586,7 @@@ static struct list_head *gro_list_prepa diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); if (skb_vlan_tag_present(p)) - diffs |= p->vlan_tci ^ skb->vlan_tci; + diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb); diffs |= skb_metadata_dst_cmp(p, skb); diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) @@@ -5611,7 -5471,8 +5611,7 @@@ static void skb_gro_reset_offset(struc NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0;
- if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && + if (!skb_headlen(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, @@@ -5802,26 -5663,6 +5802,26 @@@ struct packet_offload *gro_find_complet } EXPORT_SYMBOL(gro_find_complete_by_type);
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + static void napi_skb_free_stolen_head(struct sk_buff *skb) { skb_dst_drop(skb); @@@ -5829,13 -5670,12 +5829,13 @@@ kmem_cache_free(skbuff_head_cache, skb); }
-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) +static gro_result_t napi_skb_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) - ret = GRO_DROP; + gro_normal_one(napi, skb); break;
case GRO_DROP: @@@ -5867,7 -5707,7 +5867,7 @@@ gro_result_t napi_gro_receive(struct na
skb_gro_reset_offset(skb);
- ret = napi_skb_finish(dev_gro_receive(napi, skb), skb); + ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); trace_napi_gro_receive_exit(ret);
return ret; @@@ -5913,6 -5753,26 +5913,6 @@@ struct sk_buff *napi_get_frags(struct n } EXPORT_SYMBOL(napi_get_frags);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static void gro_normal_list(struct napi_struct *napi) -{ - if (!napi->rx_count) - return; - netif_receive_skb_list_internal(&napi->rx_list); - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; -} - -/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, - * pass the whole batch up to the stack. - */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) -{ - list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) - gro_normal_list(napi); -} - static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) @@@ -6629,9 -6489,6 +6629,9 @@@ struct netdev_adjacent /* upper master flag, there can only be one master device per list */ bool master;
+ /* lookup ignore flag */ + bool ignore; + /* counter for the number of times this device was added to us */ u16 ref_nr;
@@@ -6654,7 -6511,7 +6654,7 @@@ static struct netdev_adjacent *__netdev return NULL; }
-static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) { struct net_device *dev = data;
@@@ -6675,7 -6532,7 +6675,7 @@@ bool netdev_has_upper_dev(struct net_de { ASSERT_RTNL();
- return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); @@@ -6693,7 -6550,7 +6693,7 @@@ bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { - return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@@ -6737,22 -6594,6 +6737,22 @@@ struct net_device *netdev_master_upper_ } EXPORT_SYMBOL(netdev_master_upper_dev_get);
+static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev) +{ + struct netdev_adjacent *upper; + + ASSERT_RTNL(); + + if (list_empty(&dev->adj_list.upper)) + return NULL; + + upper = list_first_entry(&dev->adj_list.upper, + struct netdev_adjacent, list); + if (likely(upper->master) && !upper->ignore) + return upper->dev; + return NULL; +} + /** * netdev_has_any_lower_dev - Check if device is linked to some device * @dev: device @@@ -6803,23 -6644,6 +6803,23 @@@ struct net_device *netdev_upper_get_nex } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+static struct net_device *__netdev_next_upper_dev(struct net_device *dev, + struct list_head **iter, + bool *ignore) +{ + struct netdev_adjacent *upper; + + upper = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + *ignore = upper->ignore; + + return upper->dev; +} + static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, struct list_head **iter) { @@@ -6837,111 -6661,34 +6837,111 @@@ return upper->dev; }
+static int __netdev_walk_all_upper_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + bool ignore; + + now = dev; + iter = &dev->adj_list.upper; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + udev = __netdev_next_upper_dev(now, &iter, &ignore); + if (!udev) + break; + if (ignore) + continue; + + next = udev; + niter = &udev->adj_list.upper; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return 0; +} + int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, void *data), void *data) { - struct net_device *udev; - struct list_head *iter; - int ret; + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0;
- for (iter = &dev->adj_list.upper, - udev = netdev_next_upper_dev_rcu(dev, &iter); - udev; - udev = netdev_next_upper_dev_rcu(dev, &iter)) { - /* first is the upper device itself */ - ret = fn(udev, data); - if (ret) - return ret; + now = dev; + iter = &dev->adj_list.upper;
- /* then look at all of its upper devices */ - ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); - if (ret) - return ret; + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + udev = netdev_next_upper_dev_rcu(now, &iter); + if (!udev) + break; + + next = udev; + niter = &udev->adj_list.upper; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; }
return 0; } EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
+static bool __netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) +{ + ASSERT_RTNL(); + + return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, + upper_dev); +} + /** * netdev_lower_get_next_private - Get the next ->private from the * lower neighbour list @@@ -7038,119 -6785,34 +7038,119 @@@ static struct net_device *netdev_next_l return lower->dev; }
+static struct net_device *__netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter, + bool *ignore) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + *ignore = lower->ignore; + + return lower->dev; +} + int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, void *data), void *data) { - struct net_device *ldev; - struct list_head *iter; - int ret; + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0;
- for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev(dev, &iter); - ldev; - ldev = netdev_next_lower_dev(dev, &iter)) { - /* first is the lower device itself */ - ret = fn(ldev, data); - if (ret) - return ret; + now = dev; + iter = &dev->adj_list.lower;
- /* then look at all of its lower devices */ - ret = netdev_walk_all_lower_dev(ldev, fn, data); - if (ret) - return ret; + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = netdev_next_lower_dev(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; }
return 0; } EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
+static int __netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + bool ignore; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = __netdev_next_lower_dev(now, &iter, &ignore); + if (!ldev) + break; + if (ignore) + continue; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return 0; +} + static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter) { @@@ -7165,99 -6827,28 +7165,99 @@@ return lower->dev; }
-int netdev_walk_all_lower_dev_rcu(struct net_device *dev, - int (*fn)(struct net_device *dev, - void *data), - void *data) +static u8 __netdev_upper_depth(struct net_device *dev) +{ + struct net_device *udev; + struct list_head *iter; + u8 max_depth = 0; + bool ignore; + + for (iter = &dev->adj_list.upper, + udev = __netdev_next_upper_dev(dev, &iter, &ignore); + udev; + udev = __netdev_next_upper_dev(dev, &iter, &ignore)) { + if (ignore) + continue; + if (max_depth < udev->upper_level) + max_depth = udev->upper_level; + } + + return max_depth; +} + +static u8 __netdev_lower_depth(struct net_device *dev) { struct net_device *ldev; struct list_head *iter; - int ret; + u8 max_depth = 0; + bool ignore;
for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev = __netdev_next_lower_dev(dev, &iter, &ignore); ldev; - ldev = netdev_next_lower_dev_rcu(dev, &iter)) { - /* first is the lower device itself */ - ret = fn(ldev, data); - if (ret) - return ret; + ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) { + if (ignore) + continue; + if (max_depth < ldev->lower_level) + max_depth = ldev->lower_level; + }
- /* then look at all of its lower devices */ - ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); - if (ret) - return ret; + return max_depth; +} + +static int __netdev_update_upper_level(struct net_device *dev, void *data) +{ + dev->upper_level = __netdev_upper_depth(dev) + 1; + return 0; +} + +static int __netdev_update_lower_level(struct net_device *dev, void *data) +{ + dev->lower_level = __netdev_lower_depth(dev) + 1; + return 0; +} + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; }
return 0; @@@ -7361,7 -6952,6 +7361,7 @@@ static int __netdev_adjacent_dev_insert adj->master = master; adj->ref_nr = 1; adj->private = private; + adj->ignore = false; dev_hold(adj_dev);
pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", @@@ -7512,17 -7102,14 +7512,17 @@@ static int __netdev_upper_dev_link(stru return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */ - if (netdev_has_upper_dev(upper_dev, dev)) + if (__netdev_has_upper_dev(upper_dev, dev)) return -EBUSY;
+ if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) + return -EMLINK; + if (!master) { - if (netdev_has_upper_dev(dev, upper_dev)) + if (__netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; } else { - master_dev = netdev_master_upper_dev_get(dev); + master_dev = __netdev_master_upper_dev_get(dev); if (master_dev) return master_dev == upper_dev ? -EEXIST : -EBUSY; } @@@ -7544,13 -7131,6 +7544,13 @@@ if (ret) goto rollback;
+ __netdev_update_upper_level(dev, NULL); + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + + __netdev_update_lower_level(upper_dev, NULL); + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, + NULL); + return 0;
rollback: @@@ -7633,96 -7213,9 +7633,96 @@@ void netdev_upper_dev_unlink(struct net
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); + + __netdev_update_upper_level(dev, NULL); + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + + __netdev_update_lower_level(upper_dev, NULL); + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, + NULL); } EXPORT_SYMBOL(netdev_upper_dev_unlink);
+static void __netdev_adjacent_dev_set(struct net_device *upper_dev, + struct net_device *lower_dev, + bool val) +{ + struct netdev_adjacent *adj; + + adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower); + if (adj) + adj->ignore = val; + + adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper); + if (adj) + adj->ignore = val; +} + +static void netdev_adjacent_dev_disable(struct net_device *upper_dev, + struct net_device *lower_dev) +{ + __netdev_adjacent_dev_set(upper_dev, lower_dev, true); +} + +static void netdev_adjacent_dev_enable(struct net_device *upper_dev, + struct net_device *lower_dev) +{ + __netdev_adjacent_dev_set(upper_dev, lower_dev, false); +} + +int netdev_adjacent_change_prepare(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + int err; + + if (!new_dev) + return 0; + + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_disable(dev, old_dev); + + err = netdev_upper_dev_link(new_dev, dev, extack); + if (err) { + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_enable(dev, old_dev); + return err; + } + + return 0; +} +EXPORT_SYMBOL(netdev_adjacent_change_prepare); + +void netdev_adjacent_change_commit(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev) +{ + if (!new_dev || !old_dev) + return; + + if (new_dev == old_dev) + return; + + netdev_adjacent_dev_enable(dev, old_dev); + netdev_upper_dev_unlink(old_dev, dev); +} +EXPORT_SYMBOL(netdev_adjacent_change_commit); + +void netdev_adjacent_change_abort(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev) +{ + if (!new_dev) + return; + + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_enable(dev, old_dev); + + netdev_upper_dev_unlink(new_dev, dev); +} +EXPORT_SYMBOL(netdev_adjacent_change_abort); + /** * netdev_bonding_info_change - Dispatch event about slave change * @dev: device @@@ -7836,6 -7329,25 +7836,6 @@@ void *netdev_lower_dev_get_private(stru EXPORT_SYMBOL(netdev_lower_dev_get_private);
-int dev_get_nest_level(struct net_device *dev) -{ - struct net_device *lower = NULL; - struct list_head *iter; - int max_nest = -1; - int nest; - - ASSERT_RTNL(); - - netdev_for_each_lower_dev(dev, lower, iter) { - nest = dev_get_nest_level(lower); - if (max_nest < nest) - max_nest = nest; - } - - return max_nest + 1; -} -EXPORT_SYMBOL(dev_get_nest_level); - /** * netdev_lower_change - Dispatch event about lower device state change * @lower_dev: device @@@ -8188,8 -7700,7 +8188,8 @@@ int __dev_set_mtu(struct net_device *de if (ops->ndo_change_mtu) return ops->ndo_change_mtu(dev, new_mtu);
- dev->mtu = new_mtu; + /* Pairs with all the lockless reads of dev->mtu in the stack */ + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(__dev_set_mtu); @@@ -8643,8 -8154,7 +8643,8 @@@ int dev_change_xdp_fd(struct net_devic return -EINVAL; }
- if (prog->aux->id == prog_id) { + /* prog->aux->id may be 0 for orphaned device-bound progs */ + if (prog->aux->id && prog->aux->id == prog_id) { bpf_prog_put(prog); return 0; } @@@ -8754,9 -8264,6 +8754,9 @@@ static void rollback_registered_many(st dev_uc_flush(dev); dev_mc_flush(dev);
+ netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev);
@@@ -9112,7 -8619,7 +9112,7 @@@ static void netdev_init_one_queue(struc { /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key); queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; @@@ -9159,43 -8666,6 +9159,43 @@@ void netif_tx_stop_all_queues(struct ne } EXPORT_SYMBOL(netif_tx_stop_all_queues);
+static void netdev_register_lockdep_key(struct net_device *dev) +{ + lockdep_register_key(&dev->qdisc_tx_busylock_key); + lockdep_register_key(&dev->qdisc_running_key); + lockdep_register_key(&dev->qdisc_xmit_lock_key); + lockdep_register_key(&dev->addr_list_lock_key); +} + +static void netdev_unregister_lockdep_key(struct net_device *dev) +{ + lockdep_unregister_key(&dev->qdisc_tx_busylock_key); + lockdep_unregister_key(&dev->qdisc_running_key); + lockdep_unregister_key(&dev->qdisc_xmit_lock_key); + lockdep_unregister_key(&dev->addr_list_lock_key); +} + +void netdev_update_lockdep_key(struct net_device *dev) +{ + struct netdev_queue *queue; + int i; + + lockdep_unregister_key(&dev->qdisc_xmit_lock_key); + lockdep_unregister_key(&dev->addr_list_lock_key); + + lockdep_register_key(&dev->qdisc_xmit_lock_key); + lockdep_register_key(&dev->addr_list_lock_key); + + lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); + for (i = 0; i < dev->num_tx_queues; i++) { + queue = netdev_get_tx_queue(dev, i); + + lockdep_set_class(&queue->_xmit_lock, + &dev->qdisc_xmit_lock_key); + } +} +EXPORT_SYMBOL(netdev_update_lockdep_key); + /** * register_netdevice - register a network device * @dev: device to register @@@ -9230,24 -8700,19 +9230,24 @@@ int register_netdevice(struct net_devic BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock); - netdev_set_addr_lockdep_class(dev); + lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out;
+ ret = -ENOMEM; + dev->name_node = netdev_name_node_head_alloc(dev); + if (!dev->name_node) + goto out; + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); if (ret) { if (ret > 0) ret = -EIO; - goto out; + goto err_free_name; } }
@@@ -9366,8 -8831,6 +9366,8 @@@ err_uninit dev->netdev_ops->ndo_uninit(dev); if (dev->priv_destructor) dev->priv_destructor(dev); +err_free_name: + netdev_name_node_free(dev->name_node); goto out; } EXPORT_SYMBOL(register_netdevice); @@@ -9747,12 -9210,8 +9747,12 @@@ struct net_device *alloc_netdev_mqs(in
dev_net_set(dev, &init_net);
+ netdev_register_lockdep_key(dev); + dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; + dev->upper_level = 1; + dev->lower_level = 1;
INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); @@@ -9833,8 -9292,6 +9833,8 @@@ void free_netdev(struct net_device *dev free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL;
+ netdev_unregister_lockdep_key(dev); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); @@@ -10003,7 -9460,7 +10003,7 @@@ int dev_change_net_namespace(struct net call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier();
- new_nsid = peernet2id_alloc(dev_net(dev), net); + new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) new_ifindex = dev_new_index(net); @@@ -10165,7 -9622,7 +10165,7 @@@ static struct hlist_head * __net_init n static int __net_init netdev_init(struct net *net) { BUILD_BUG_ON(GRO_HASH_BUCKETS > - 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask)); + 8 * sizeof_member(struct napi_struct, gro_bitmask));
if (net != &init_net) INIT_LIST_HEAD(&net->dev_base_head); @@@ -10178,8 -9635,6 +10178,8 @@@ if (net->dev_index_head == NULL) goto err_idx;
+ RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); + return 0;
err_idx: diff --combined net/core/filter.c index f1e703eed3d2,284f60695ba9..07ca7e611e88 --- a/net/core/filter.c +++ b/net/core/filter.c @@@ -274,7 -274,7 +274,7 @@@ static u32 convert_skb_access(int skb_f
switch (skb_field) { case SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + BUILD_BUG_ON(sizeof_member(struct sk_buff, mark) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, offsetof(struct sk_buff, mark)); @@@ -289,14 -289,14 +289,14 @@@ break;
case SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + BUILD_BUG_ON(sizeof_member(struct sk_buff, queue_mapping) != 2);
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, offsetof(struct sk_buff, queue_mapping)); break;
case SKF_AD_VLAN_TAG: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(sizeof_member(struct sk_buff, vlan_tci) != 2);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, @@@ -322,7 -322,7 +322,7 @@@ static bool convert_bpf_extensions(stru
switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + BUILD_BUG_ON(sizeof_member(struct sk_buff, protocol) != 2);
/* A = *(u16 *) (CTX + offsetof(protocol)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, @@@ -338,8 -338,8 +338,8 @@@
case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + BUILD_BUG_ON(sizeof_member(struct net_device, ifindex) != 4); + BUILD_BUG_ON(sizeof_member(struct net_device, type) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), BPF_REG_TMP, BPF_REG_CTX, @@@ -361,7 -361,7 +361,7 @@@ break;
case SKF_AD_OFF + SKF_AD_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + BUILD_BUG_ON(sizeof_member(struct sk_buff, hash) != 4);
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, offsetof(struct sk_buff, hash)); @@@ -385,7 -385,7 +385,7 @@@ break;
case SKF_AD_OFF + SKF_AD_VLAN_TPID: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + BUILD_BUG_ON(sizeof_member(struct sk_buff, vlan_proto) != 2);
/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, @@@ -2245,7 -2245,7 +2245,7 @@@ BPF_CALL_4(bpf_msg_pull_data, struct sk * account for the headroom. */ bytes_sg_total = start - offset + bytes; - if (!msg->sg.copy[i] && bytes_sg_total <= len) + if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len) goto out;
/* At this point we need to linearize multiple scatterlist @@@ -2299,7 -2299,7 +2299,7 @@@ WARN_ON_ONCE(last_sge == first_sge); shift = last_sge > first_sge ? last_sge - first_sge - 1 : - MAX_SKB_FRAGS - first_sge + last_sge - 1; + NR_MSG_FRAG_IDS - first_sge + last_sge - 1; if (!shift) goto out;
@@@ -2308,8 -2308,8 +2308,8 @@@ do { u32 move_from;
- if (i + shift >= MAX_MSG_FRAGS) - move_from = i + shift - MAX_MSG_FRAGS; + if (i + shift >= NR_MSG_FRAG_IDS) + move_from = i + shift - NR_MSG_FRAG_IDS; else move_from = i + shift; if (move_from == msg->sg.end) @@@ -2323,7 -2323,7 +2323,7 @@@ } while (1);
msg->sg.end = msg->sg.end - shift > msg->sg.end ? - msg->sg.end - shift + MAX_MSG_FRAGS : + msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; @@@ -2450,7 -2450,7 +2450,7 @@@ BPF_CALL_4(bpf_msg_push_data, struct sk /* Place newly allocated data buffer */ sk_mem_charge(msg->sk, len); msg->sg.size += len; - msg->sg.copy[new] = false; + __clear_bit(new, &msg->sg.copy); sg_set_page(&msg->sg.data[new], page, len + copy, 0); if (rsge.length) { get_page(sg_page(&rsge)); @@@ -3798,7 -3798,7 +3798,7 @@@ BPF_CALL_5(bpf_skb_event_output, struc
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(skb_size > skb->len)) + if (unlikely(!skb || skb_size > skb->len)) return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size, @@@ -3816,19 -3816,6 +3816,19 @@@ static const struct bpf_func_proto bpf_ .arg5_type = ARG_CONST_SIZE_OR_ZERO, };
+static int bpf_skb_output_btf_ids[5]; +const struct bpf_func_proto bpf_skb_output_proto = { + .func = bpf_skb_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_skb_output_btf_ids, +}; + static unsigned short bpf_tunnel_key_af(u64 flags) { return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; @@@ -4102,7 -4089,7 +4102,7 @@@ BPF_CALL_1(bpf_skb_cgroup_id, const str return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return cgrp->kn->id.id; + return cgroup_id(cgrp); }
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = { @@@ -4127,7 -4114,7 +4127,7 @@@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id, if (!ancestor) return 0;
- return ancestor->kn->id.id; + return cgroup_id(ancestor); }
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = { @@@ -4265,14 -4252,12 +4265,14 @@@ BPF_CALL_5(bpf_setsockopt, struct bpf_s case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + WRITE_ONCE(sk->sk_sndbuf, + max_t(int, val * 2, SOCK_MIN_SNDBUF)); break; case SO_MAX_PACING_RATE: /* 32bit version */ if (val != ~0U) @@@ -4289,7 -4274,7 +4289,7 @@@ case SO_RCVLOWAT: if (val < 0) val = INT_MAX; - sk->sk_rcvlowat = val ? : 1; + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); break; case SO_MARK: if (sk->sk_mark != val) { @@@ -5589,8 -5574,8 +5589,8 @@@ u32 bpf_tcp_sock_convert_ctx_access(enu
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \ do { \ - BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ - FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + BUILD_BUG_ON(sizeof_member(struct tcp_sock, FIELD) > \ + sizeof_member(struct bpf_tcp_sock, FIELD)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ si->dst_reg, si->src_reg, \ offsetof(struct tcp_sock, FIELD)); \ @@@ -5598,9 -5583,9 +5598,9 @@@
#define BPF_INET_SOCK_GET_COMMON(FIELD) \ do { \ - BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \ + BUILD_BUG_ON(sizeof_member(struct inet_connection_sock, \ FIELD) > \ - FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + sizeof_member(struct bpf_tcp_sock, FIELD)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct inet_connection_sock, \ FIELD), \ @@@ -5615,7 -5600,7 +5615,7 @@@
switch (si->off) { case offsetof(struct bpf_tcp_sock, rtt_min): - BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + BUILD_BUG_ON(sizeof_member(struct tcp_sock, rtt_min) != sizeof(struct minmax)); BUILD_BUG_ON(sizeof(struct minmax) < sizeof(struct minmax_sample)); @@@ -5780,8 -5765,8 +5780,8 @@@ u32 bpf_xdp_sock_convert_ctx_access(enu
#define BPF_XDP_SOCK_GET(FIELD) \ do { \ - BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \ - FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \ + BUILD_BUG_ON(sizeof_member(struct xdp_sock, FIELD) > \ + sizeof_member(struct bpf_xdp_sock, FIELD)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\ si->dst_reg, si->src_reg, \ offsetof(struct xdp_sock, FIELD)); \ @@@ -7344,7 -7329,7 +7344,7 @@@ static u32 bpf_convert_ctx_access(enum
case offsetof(struct __sk_buff, cb[0]) ... offsetofend(struct __sk_buff, cb[4]) - 1: - BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + BUILD_BUG_ON(sizeof_member(struct qdisc_skb_cb, data) < 20); BUILD_BUG_ON((offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data)) % sizeof(__u64)); @@@ -7363,7 -7348,7 +7363,7 @@@ break;
case offsetof(struct __sk_buff, tc_classid): - BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2); + BUILD_BUG_ON(sizeof_member(struct qdisc_skb_cb, tc_classid) != 2);
off = si->off; off -= offsetof(struct __sk_buff, tc_classid); @@@ -7434,7 -7419,7 +7434,7 @@@ #endif break; case offsetof(struct __sk_buff, family): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), si->dst_reg, si->src_reg, @@@ -7445,7 -7430,7 +7445,7 @@@ 2, target_size)); break; case offsetof(struct __sk_buff, remote_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), si->dst_reg, si->src_reg, @@@ -7456,7 -7441,7 +7456,7 @@@ 4, target_size)); break; case offsetof(struct __sk_buff, local_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), @@@ -7470,7 -7455,7 +7470,7 @@@ case offsetof(struct __sk_buff, remote_ip6[0]) ... offsetof(struct __sk_buff, remote_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off; @@@ -7490,7 -7475,7 +7490,7 @@@ case offsetof(struct __sk_buff, local_ip6[0]) ... offsetof(struct __sk_buff, local_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off; @@@ -7509,7 -7494,7 +7509,7 @@@ break;
case offsetof(struct __sk_buff, remote_port): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), si->dst_reg, si->src_reg, @@@ -7524,7 -7509,7 +7524,7 @@@ break;
case offsetof(struct __sk_buff, local_port): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), si->dst_reg, si->src_reg, @@@ -7535,7 -7520,7 +7535,7 @@@ break;
case offsetof(struct __sk_buff, tstamp): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); + BUILD_BUG_ON(sizeof_member(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_DW, @@@ -7573,7 -7558,7 +7573,7 @@@ target_size)); break; case offsetof(struct __sk_buff, wire_len): - BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); + BUILD_BUG_ON(sizeof_member(struct qdisc_skb_cb, pkt_len) != 4);
off = si->off; off -= offsetof(struct __sk_buff, wire_len); @@@ -7603,7 -7588,7 +7603,7 @@@ u32 bpf_sock_convert_ctx_access(enum bp
switch (si->off) { case offsetof(struct bpf_sock, bound_dev_if): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4); + BUILD_BUG_ON(sizeof_member(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, @@@ -7614,7 -7599,7 +7614,7 @@@ break;
case offsetof(struct bpf_sock, mark): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); + BUILD_BUG_ON(sizeof_member(struct sock, sk_mark) != 4);
if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, @@@ -7625,7 -7610,7 +7625,7 @@@ break;
case offsetof(struct bpf_sock, priority): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); + BUILD_BUG_ON(sizeof_member(struct sock, sk_priority) != 4);
if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, @@@ -7641,7 -7626,7 +7641,7 @@@ si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_family, - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_family), target_size)); break; @@@ -7668,7 -7653,7 +7668,7 @@@ *insn++ = BPF_LDX_MEM( BPF_SIZE(si->code), si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_rcv_saddr, - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_rcv_saddr), target_size)); break; @@@ -7677,7 -7662,7 +7677,7 @@@ *insn++ = BPF_LDX_MEM( BPF_SIZE(si->code), si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_daddr, - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_daddr), target_size)); break; @@@ -7691,7 -7676,7 +7691,7 @@@ bpf_target_off( struct sock_common, skc_v6_rcv_saddr.s6_addr32[0], - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_v6_rcv_saddr.s6_addr32[0]), target_size) + off); #else @@@ -7708,7 -7693,7 +7708,7 @@@ BPF_SIZE(si->code), si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_v6_daddr.s6_addr32[0], - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_v6_daddr.s6_addr32[0]), target_size) + off); #else @@@ -7722,7 -7707,7 +7722,7 @@@ BPF_FIELD_SIZEOF(struct sock_common, skc_num), si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_num, - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_num), target_size)); break; @@@ -7732,7 -7717,7 +7732,7 @@@ BPF_FIELD_SIZEOF(struct sock_common, skc_dport), si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_dport, - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_dport), target_size)); break; @@@ -7742,7 -7727,7 +7742,7 @@@ BPF_FIELD_SIZEOF(struct sock_common, skc_state), si->dst_reg, si->src_reg, bpf_target_off(struct sock_common, skc_state, - FIELD_SIZEOF(struct sock_common, + sizeof_member(struct sock_common, skc_state), target_size)); break; @@@ -7837,7 -7822,7 +7837,7 @@@ static u32 xdp_convert_ctx_access(enum si->src_reg, offsetof(S, F)); \ *insn++ = BPF_LDX_MEM( \ SIZE, si->dst_reg, si->dst_reg, \ - bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ + bpf_target_off(NS, NF, sizeof_member(NS, NF), \ target_size) \ + OFF); \ } while (0) @@@ -7868,7 -7853,7 +7868,7 @@@ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ si->dst_reg, offsetof(S, F)); \ *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ - bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ + bpf_target_off(NS, NF, sizeof_member(NS, NF), \ target_size) \ + OFF); \ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ @@@ -7930,8 -7915,8 +7930,8 @@@ static u32 sock_addr_convert_ctx_access */ BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) != offsetof(struct sockaddr_in6, sin6_port)); - BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) != - FIELD_SIZEOF(struct sockaddr_in6, sin6_port)); + BUILD_BUG_ON(sizeof_member(struct sockaddr_in, sin_port) != + sizeof_member(struct sockaddr_in6, sin6_port)); SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr, sin6_port, tmp_reg); @@@ -7997,8 -7982,8 +7997,8 @@@ static u32 sock_ops_convert_ctx_access( /* Helper macro for adding read access to tcp_sock or sock fields. */ #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ do { \ - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + BUILD_BUG_ON(sizeof_member(OBJ, OBJ_FIELD) > \ + sizeof_member(struct bpf_sock_ops, BPF_FIELD)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, \ is_fullsock), \ @@@ -8031,8 -8016,8 +8031,8 @@@ #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ do { \ int reg = BPF_REG_9; \ - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + BUILD_BUG_ON(sizeof_member(OBJ, OBJ_FIELD) > \ + sizeof_member(struct bpf_sock_ops, BPF_FIELD)); \ if (si->dst_reg == reg || si->src_reg == reg) \ reg--; \ if (si->dst_reg == reg || si->src_reg == reg) \ @@@ -8073,12 -8058,12 +8073,12 @@@ switch (si->off) { case offsetof(struct bpf_sock_ops, op) ... offsetof(struct bpf_sock_ops, replylong[3]): - BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) != - FIELD_SIZEOF(struct bpf_sock_ops_kern, op)); - BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) != - FIELD_SIZEOF(struct bpf_sock_ops_kern, reply)); - BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) != - FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong)); + BUILD_BUG_ON(sizeof_member(struct bpf_sock_ops, op) != + sizeof_member(struct bpf_sock_ops_kern, op)); + BUILD_BUG_ON(sizeof_member(struct bpf_sock_ops, reply) != + sizeof_member(struct bpf_sock_ops_kern, reply)); + BUILD_BUG_ON(sizeof_member(struct bpf_sock_ops, replylong) != + sizeof_member(struct bpf_sock_ops_kern, replylong)); off = si->off; off -= offsetof(struct bpf_sock_ops, op); off += offsetof(struct bpf_sock_ops_kern, op); @@@ -8091,7 -8076,7 +8091,7 @@@ break;
case offsetof(struct bpf_sock_ops, family): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct bpf_sock_ops_kern, sk), @@@ -8102,7 -8087,7 +8102,7 @@@ break;
case offsetof(struct bpf_sock_ops, remote_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct bpf_sock_ops_kern, sk), @@@ -8113,7 -8098,7 +8113,7 @@@ break;
case offsetof(struct bpf_sock_ops, local_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( @@@ -8128,7 -8113,7 +8128,7 @@@ case offsetof(struct bpf_sock_ops, remote_ip6[0]) ... offsetof(struct bpf_sock_ops, remote_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off; @@@ -8149,7 -8134,7 +8149,7 @@@ case offsetof(struct bpf_sock_ops, local_ip6[0]) ... offsetof(struct bpf_sock_ops, local_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off; @@@ -8168,7 -8153,7 +8168,7 @@@ break;
case offsetof(struct bpf_sock_ops, remote_port): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct bpf_sock_ops_kern, sk), @@@ -8182,7 -8167,7 +8182,7 @@@ break;
case offsetof(struct bpf_sock_ops, local_port): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct bpf_sock_ops_kern, sk), @@@ -8202,7 -8187,7 +8202,7 @@@ break;
case offsetof(struct bpf_sock_ops, state): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_state) != 1);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct bpf_sock_ops_kern, sk), @@@ -8213,7 -8198,7 +8213,7 @@@ break;
case offsetof(struct bpf_sock_ops, rtt_min): - BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + BUILD_BUG_ON(sizeof_member(struct tcp_sock, rtt_min) != sizeof(struct minmax)); BUILD_BUG_ON(sizeof(struct minmax) < sizeof(struct minmax_sample)); @@@ -8224,7 -8209,7 +8224,7 @@@ offsetof(struct bpf_sock_ops_kern, sk)); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, offsetof(struct tcp_sock, rtt_min) + - FIELD_SIZEOF(struct minmax_sample, t)); + sizeof_member(struct minmax_sample, t)); break;
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): @@@ -8366,7 -8351,7 +8366,7 @@@ static u32 sk_msg_convert_ctx_access(en offsetof(struct sk_msg, data_end)); break; case offsetof(struct sk_msg_md, family): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct sk_msg, sk), @@@ -8377,7 -8362,7 +8377,7 @@@ break;
case offsetof(struct sk_msg_md, remote_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct sk_msg, sk), @@@ -8388,7 -8373,7 +8388,7 @@@ break;
case offsetof(struct sk_msg_md, local_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( @@@ -8403,7 -8388,7 +8403,7 @@@ case offsetof(struct sk_msg_md, remote_ip6[0]) ... offsetof(struct sk_msg_md, remote_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off; @@@ -8424,7 -8409,7 +8424,7 @@@ case offsetof(struct sk_msg_md, local_ip6[0]) ... offsetof(struct sk_msg_md, local_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off; @@@ -8443,7 -8428,7 +8443,7 @@@ break;
case offsetof(struct sk_msg_md, remote_port): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct sk_msg, sk), @@@ -8457,7 -8442,7 +8457,7 @@@ break;
case offsetof(struct sk_msg_md, local_port): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + BUILD_BUG_ON(sizeof_member(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct sk_msg, sk), @@@ -8684,6 -8669,16 +8684,6 @@@ out }
#ifdef CONFIG_INET -struct sk_reuseport_kern { - struct sk_buff *skb; - struct sock *sk; - struct sock *selected_sk; - void *data_end; - u32 hash; - u32 reuseport_id; - bool bind_inany; -}; - static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, struct sock_reuseport *reuse, struct sock *sk, struct sk_buff *skb, @@@ -8847,7 -8842,7 +8847,7 @@@ sk_reuseport_is_valid_access(int off, i
/* Fields that allow narrowing */ case bpf_ctx_range(struct sk_reuseport_md, eth_protocol): - if (size < FIELD_SIZEOF(struct sk_buff, protocol)) + if (size < sizeof_member(struct sk_buff, protocol)) return false; /* fall through */ case bpf_ctx_range(struct sk_reuseport_md, ip_protocol): @@@ -8865,7 -8860,7 +8865,7 @@@ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \ si->dst_reg, si->src_reg, \ bpf_target_off(struct sk_reuseport_kern, F, \ - FIELD_SIZEOF(struct sk_reuseport_kern, F), \ + sizeof_member(struct sk_reuseport_kern, F), \ target_size)); \ })
diff --combined net/core/flow_dissector.c index d524a693e00f,3f5f61bc2bfa..0764b387943b --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@@ -114,50 -114,19 +114,50 @@@ int skb_flow_dissector_bpf_prog_attach( { struct bpf_prog *attached; struct net *net; + int ret = 0;
net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); + + if (net == &init_net) { + /* BPF flow dissector in the root namespace overrides + * any per-net-namespace one. When attaching to root, + * make sure we don't have any BPF program attached + * to the non-root namespaces. + */ + struct net *ns; + + for_each_net(ns) { + if (ns == &init_net) + continue; + if (rcu_access_pointer(ns->flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + } else { + /* Make sure root flow dissector is not attached + * when attaching to the non-root namespace. + */ + if (rcu_access_pointer(init_net.flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); - if (attached) { - /* Only one BPF program can be attached at a time */ - mutex_unlock(&flow_dissector_mutex); - return -EEXIST; + if (attached == prog) { + /* The same program cannot be attached twice */ + ret = -EINVAL; + goto out; } rcu_assign_pointer(net->flow_dissector_prog, prog); + if (attached) + bpf_prog_put(attached); +out: mutex_unlock(&flow_dissector_mutex); - return 0; + return ret; }
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) @@@ -178,6 -147,27 +178,6 @@@ mutex_unlock(&flow_dissector_mutex); return 0; } -/** - * skb_flow_get_be16 - extract be16 entity - * @skb: sk_buff to extract from - * @poff: offset to extract at - * @data: raw buffer pointer to the packet - * @hlen: packet header length - * - * The function will try to retrieve a be32 entity at - * offset poff - */ -static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, - void *data, int hlen) -{ - __be16 *u, _u; - - u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u); - if (u) - return *u; - - return 0; -}
/** * __skb_flow_get_ports - extract the upper layer ports and return them @@@ -213,72 -203,6 +213,72 @@@ __be32 __skb_flow_get_ports(const struc } EXPORT_SYMBOL(__skb_flow_get_ports);
+static bool icmp_has_id(u8 type) +{ + switch (type) { + case ICMP_ECHO: + case ICMP_ECHOREPLY: + case ICMP_TIMESTAMP: + case ICMP_TIMESTAMPREPLY: + case ICMPV6_ECHO_REQUEST: + case ICMPV6_ECHO_REPLY: + return true; + } + + return false; +} + +/** + * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields + * @skb: sk_buff to extract from + * @key_icmp: struct flow_dissector_key_icmp to fill + * @data: raw buffer pointer to the packet + * @toff: offset to extract at + * @hlen: packet header length + */ +void skb_flow_get_icmp_tci(const struct sk_buff *skb, + struct flow_dissector_key_icmp *key_icmp, + void *data, int thoff, int hlen) +{ + struct icmphdr *ih, _ih; + + ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih); + if (!ih) + return; + + key_icmp->type = ih->type; + key_icmp->code = ih->code; + + /* As we use 0 to signal that the Id field is not present, + * avoid confusion with packets without such field + */ + if (icmp_has_id(ih->type)) + key_icmp->id = ih->un.echo.id ? : 1; + else + key_icmp->id = 0; +} +EXPORT_SYMBOL(skb_flow_get_icmp_tci); + +/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet + * using skb_flow_get_icmp_tci(). + */ +static void __skb_flow_dissect_icmp(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, + void *data, int thoff, int hlen) +{ + struct flow_dissector_key_icmp *key_icmp; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP)) + return; + + key_icmp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ICMP, + target_container); + + skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); +} + void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) @@@ -599,8 -523,8 +599,8 @@@ __skb_flow_dissect_gre(const struct sk_ offset += sizeof(struct gre_base_hdr);
if (hdr->flags & GRE_CSUM) - offset += FIELD_SIZEOF(struct gre_full_hdr, csum) + - FIELD_SIZEOF(struct gre_full_hdr, reserved1); + offset += sizeof_member(struct gre_full_hdr, csum) + + sizeof_member(struct gre_full_hdr, reserved1);
if (hdr->flags & GRE_KEY) { const __be32 *keyid; @@@ -622,11 -546,11 +622,11 @@@ else key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; } - offset += FIELD_SIZEOF(struct gre_full_hdr, key); + offset += sizeof_member(struct gre_full_hdr, key); }
if (hdr->flags & GRE_SEQ) - offset += FIELD_SIZEOF(struct pptp_gre_header, seq); + offset += sizeof_member(struct pptp_gre_header, seq);
if (gre_ver == 0) { if (*p_proto == htons(ETH_P_TEB)) { @@@ -653,7 -577,7 +653,7 @@@ u8 *ppp_hdr;
if (hdr->flags & GRE_ACK) - offset += FIELD_SIZEOF(struct pptp_gre_header, ack); + offset += sizeof_member(struct pptp_gre_header, ack);
ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_ppp_hdr), @@@ -759,31 -683,6 +759,31 @@@ __skb_flow_dissect_tcp(const struct sk_ key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); }
+static void +__skb_flow_dissect_ports(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int nhoff, + u8 ip_proto, int hlen) +{ + enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; + struct flow_dissector_key_ports *key_ports; + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) + dissector_ports = FLOW_DISSECTOR_KEY_PORTS; + else if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) + dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; + + if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) + return; + + key_ports = skb_flow_dissector_target(flow_dissector, + dissector_ports, + target_container); + key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); +} + static void __skb_flow_dissect_ipv4(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@@ -953,6 -852,8 +953,6 @@@ bool __skb_flow_dissect(const struct ne struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_ports *key_ports; - struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; struct bpf_prog *attached = NULL; @@@ -969,10 -870,9 +969,10 @@@ nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); #if IS_ENABLED(CONFIG_NET_DSA) - if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { + if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && + proto == htons(ETH_P_XDSA))) { const struct dsa_device_ops *ops; - int offset; + int offset = 0;
ops = skb->dev->dsa_ptr->tag_ops; if (ops->flow_dissect && @@@ -1010,10 -910,7 +1010,10 @@@ WARN_ON_ONCE(!net); if (net) { rcu_read_lock(); - attached = rcu_dereference(net->flow_dissector_prog); + attached = rcu_dereference(init_net.flow_dissector_prog); + + if (!attached) + attached = rcu_dereference(net->flow_dissector_prog);
if (attached) { struct bpf_flow_keys flow_keys; @@@ -1398,19 -1295,26 +1398,19 @@@ ip_proto_again data, nhoff, hlen); break;
- default: + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + __skb_flow_dissect_icmp(skb, flow_dissector, target_container, + data, nhoff, hlen); break; - }
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && - !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { - key_ports = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS, - target_container); - key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + default: + break; }
- if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ICMP)) { - key_icmp = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_ICMP, - target_container); - key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen); - } + if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT)) + __skb_flow_dissect_ports(skb, flow_dissector, target_container, + data, nhoff, ip_proto, hlen);
/* Process result of IP proto processing */ switch (fdret) { @@@ -1446,23 -1350,32 +1446,23 @@@ out_bad } EXPORT_SYMBOL(__skb_flow_dissect);
-static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); }
-static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) -{ - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; }
static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); + BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); - BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != - sizeof(*flow) - sizeof(flow->addrs));
switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: @@@ -1475,7 -1388,7 +1475,7 @@@ diff -= sizeof(flow->addrs.tipckey); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return sizeof(*flow) - diff; }
__be32 flow_get_u32_src(const struct flow_keys *flow) @@@ -1508,9 -1421,6 +1508,9 @@@ __be32 flow_get_u32_dst(const struct fl } EXPORT_SYMBOL(flow_get_u32_dst);
+/* Sort the source and destination IP (and the ports if the IP are the same), + * to have consistent hash within the two directions + */ static inline void __flow_hash_consistentify(struct flow_keys *keys) { int addr_diff, i; @@@ -1544,15 -1454,14 +1544,15 @@@ } }
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash;
__flow_hash_consistentify(keys);
- hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1;
@@@ -1562,13 -1471,12 +1562,13 @@@ u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys);
static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@@ -1616,7 -1524,7 +1616,7 @@@ u32 __skb_get_hash_symmetric(const stru &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
- return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
@@@ -1636,14 -1544,13 +1636,14 @@@ void __skb_get_hash(struct sk_buff *skb
__flow_hash_secret_init();
- hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd);
__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash);
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys;
diff --combined net/core/skbuff.c index 973a71f4bc89,6054465875d8..2a4637d4ecff --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@@ -4148,7 -4148,7 +4148,7 @@@ void __init skb_init(void 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, offsetof(struct sk_buff, cb), - sizeof_field(struct sk_buff, cb), + sizeof_member(struct sk_buff, cb), NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), @@@ -4415,7 -4415,7 +4415,7 @@@ static void skb_set_err_queue(struct sk int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) + (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM;
skb_orphan(skb); @@@ -5477,14 -5477,12 +5477,14 @@@ static void skb_mod_eth_type(struct sk_ * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) + * @mac_len: length of the MAC header * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ -int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto) +int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, + int mac_len, bool ethernet) { struct mpls_shim_hdr *lse; int err; @@@ -5501,21 -5499,21 +5501,21 @@@ return err;
if (!skb->inner_protocol) { - skb_set_inner_network_header(skb, skb->mac_len); + skb_set_inner_network_header(skb, mac_len); skb_set_inner_protocol(skb, skb->protocol); }
skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), - skb->mac_len); + mac_len); skb_reset_mac_header(skb); - skb_set_network_header(skb, skb->mac_len); + skb_set_network_header(skb, mac_len);
lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN);
- if (skb->dev && skb->dev->type == ARPHRD_ETHER) + if (ethernet) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto;
@@@ -5528,34 -5526,31 +5528,34 @@@ EXPORT_SYMBOL_GPL(skb_mpls_push) * * @skb: buffer * @next_proto: ethertype of header after popped MPLS header + * @mac_len: length of the MAC header + * @ethernet: flag to indicate if ethernet header is present in packet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ -int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto) +int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, + bool ethernet) { int err;
if (unlikely(!eth_p_mpls(skb->protocol))) - return -EINVAL; + return 0;
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); + err = skb_ensure_writable(skb, mac_len + MPLS_HLEN); if (unlikely(err)) return err;
skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), - skb->mac_len); + mac_len);
__skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); - skb_set_network_header(skb, skb->mac_len); + skb_set_network_header(skb, mac_len);
- if (skb->dev && skb->dev->type == ARPHRD_ETHER) { + if (ethernet) { struct ethhdr *hdr;
/* use mpls_hdr() to get ethertype to account for VLANs. */ diff --combined net/core/xdp.c index 7c8390ad4dc6,621d6148b07a..9040a8f308c8 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@@ -36,7 -36,7 +36,7 @@@ static u32 xdp_mem_id_hashfn(const voi const u32 *k = data; const u32 key = *k;
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) + BUILD_BUG_ON(sizeof_member(struct xdp_mem_allocator, mem.id) != sizeof(u32));
/* Use cyclic increasing ID as direct hash key */ @@@ -56,7 -56,7 +56,7 @@@ static const struct rhashtable_params m .nelem_hint = 64, .head_offset = offsetof(struct xdp_mem_allocator, node), .key_offset = offsetof(struct xdp_mem_allocator, mem.id), - .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), + .key_len = sizeof_member(struct xdp_mem_allocator, mem.id), .max_size = MEM_ID_MAX, .min_size = 8, .automatic_shrinking = true, @@@ -70,63 -70,77 +70,63 @@@ static void __xdp_mem_allocator_rcu_fre
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
- /* Allocator have indicated safe to remove before this is called */ - if (xa->mem.type == MEM_TYPE_PAGE_POOL) - page_pool_free(xa->page_pool); - /* Allow this ID to be reused */ ida_simple_remove(&mem_id_pool, xa->mem.id);
- /* Poison memory */ - xa->mem.id = 0xFFFF; - xa->mem.type = 0xF0F0; - xa->allocator = (void *)0xDEAD9001; - kfree(xa); }
-static bool __mem_id_disconnect(int id, bool force) +static void mem_xa_remove(struct xdp_mem_allocator *xa) +{ + trace_mem_disconnect(xa); + + if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) + call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); +} + +static void mem_allocator_disconnect(void *allocator) { struct xdp_mem_allocator *xa; - bool safe_to_remove = true; + struct rhashtable_iter iter;
mutex_lock(&mem_id_lock);
- xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); - if (!xa) { - mutex_unlock(&mem_id_lock); - WARN(1, "Request remove non-existing id(%d), driver bug?", id); - return true; - } - xa->disconnect_cnt++; + rhashtable_walk_enter(mem_id_ht, &iter); + do { + rhashtable_walk_start(&iter);
- /* Detects in-flight packet-pages for page_pool */ - if (xa->mem.type == MEM_TYPE_PAGE_POOL) - safe_to_remove = page_pool_request_shutdown(xa->page_pool); + while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) { + if (xa->allocator == allocator) + mem_xa_remove(xa); + }
- trace_mem_disconnect(xa, safe_to_remove, force); + rhashtable_walk_stop(&iter);
- if ((safe_to_remove || force) && - !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) - call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); + } while (xa == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter);
mutex_unlock(&mem_id_lock); - return (safe_to_remove|force); }
-#define DEFER_TIME (msecs_to_jiffies(1000)) -#define DEFER_WARN_INTERVAL (30 * HZ) -#define DEFER_MAX_RETRIES 120 - -static void mem_id_disconnect_defer_retry(struct work_struct *wq) +static void mem_id_disconnect(int id) { - struct delayed_work *dwq = to_delayed_work(wq); - struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq); - bool force = false; + struct xdp_mem_allocator *xa;
- if (xa->disconnect_cnt > DEFER_MAX_RETRIES) - force = true; + mutex_lock(&mem_id_lock);
- if (__mem_id_disconnect(xa->mem.id, force)) + xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); + if (!xa) { + mutex_unlock(&mem_id_lock); + WARN(1, "Request remove non-existing id(%d), driver bug?", id); return; + }
- /* Periodic warning */ - if (time_after_eq(jiffies, xa->defer_warn)) { - int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ; + trace_mem_disconnect(xa);
- pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n", - __func__, xa->mem.id, xa->disconnect_cnt, sec); - xa->defer_warn = jiffies + DEFER_WARN_INTERVAL; - } + if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) + call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
- /* Still not ready to be disconnected, retry later */ - schedule_delayed_work(&xa->defer_wq, DEFER_TIME); + mutex_unlock(&mem_id_lock); }
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) @@@ -139,21 -153,38 +139,21 @@@ return; }
- if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL && - xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) { - return; - } - if (id == 0) return;
- if (__mem_id_disconnect(id, false)) - return; - - /* Could not disconnect, defer new disconnect attempt to later */ - mutex_lock(&mem_id_lock); + if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY) + return mem_id_disconnect(id);
- xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); - if (!xa) { - mutex_unlock(&mem_id_lock); - return; + if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) { + rcu_read_lock(); + xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); + page_pool_destroy(xa->page_pool); + rcu_read_unlock(); } - xa->defer_start = jiffies; - xa->defer_warn = jiffies + DEFER_WARN_INTERVAL; - - INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry); - mutex_unlock(&mem_id_lock); - schedule_delayed_work(&xa->defer_wq, DEFER_TIME); } EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
-/* This unregister operation will also cleanup and destroy the - * allocator. The page_pool_free() operation is first called when it's - * safe to remove, possibly deferred to a workqueue. - */ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) { /* Simplify driver cleanup code paths, allow unreg "unused" */ @@@ -340,7 -371,7 +340,7 @@@ int xdp_rxq_info_reg_mem_model(struct x }
if (type == MEM_TYPE_PAGE_POOL) - page_pool_get(xdp_alloc->page_pool); + page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
mutex_unlock(&mem_id_lock);
@@@ -355,7 -386,7 +355,7 @@@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_
/* XDP RX runs under NAPI protection, and in different delivery error * scenarios (e.g. queue full), it is possible to return the xdp_frame - * while still leveraging this protection. The @napi_direct boolian + * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ @@@ -371,8 -402,15 +371,8 @@@ static void __xdp_return(void *data, st /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); page = virt_to_head_page(data); - if (likely(xa)) { - napi_direct &= !xdp_return_frame_no_direct(); - page_pool_put_page(xa->page_pool, page, napi_direct); - } else { - /* Hopefully stack show who to blame for late return */ - WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id); - trace_mem_return_failed(mem, page); - put_page(page); - } + napi_direct &= !xdp_return_frame_no_direct(); + page_pool_put_page(xa->page_pool, page, napi_direct); rcu_read_unlock(); break; case MEM_TYPE_PAGE_SHARED: diff --combined net/dccp/proto.c index a52e8ba1ced0,7175a33090b5..4d7396ed8693 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@@ -944,7 -944,7 +944,7 @@@ int inet_dccp_listen(struct socket *soc if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) goto out;
- sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ @@@ -1132,7 -1132,7 +1132,7 @@@ static int __init dccp_init(void int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_member(struct sk_buff, cb)); rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); if (rc) goto out_fail; diff --combined net/ipv4/ip_gre.c index 572b6307a2df,9c9716f7ccdb..2c8abf9dbb34 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@@ -340,8 -340,6 +340,8 @@@ static int __ipgre_rcv(struct sk_buff * iph->saddr, iph->daddr, tpi->key);
if (tunnel) { + const struct iphdr *tnl_params; + if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, raw_proto, false) < 0) goto drop; @@@ -350,9 -348,7 +350,9 @@@ skb_pop_mac_header(skb); else skb_reset_mac_header(skb); - if (tunnel->collect_md) { + + tnl_params = &tunnel->parms.iph; + if (tunnel->collect_md || tnl_params->daddr == 0) { __be16 flags; __be64 tun_id;
@@@ -513,9 -509,9 +513,9 @@@ static void erspan_fb_xmit(struct sk_bu key = &tun_info->key; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) goto err_free_skb; - md = ip_tunnel_info_opts(tun_info); - if (!md) + if (tun_info->options_len < sizeof(*md)) goto err_free_skb; + md = ip_tunnel_info_opts(tun_info);
/* ERSPAN has fixed 8 byte GRE header */ version = md->version; @@@ -1464,8 -1460,8 +1464,8 @@@ static const struct nla_policy ipgre_po [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_IKEY] = { .type = NLA_U32 }, [IFLA_GRE_OKEY] = { .type = NLA_U32 }, - [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, - [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GRE_LOCAL] = { .len = sizeof_member(struct iphdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = sizeof_member(struct iphdr, daddr) }, [IFLA_GRE_TTL] = { .type = NLA_U8 }, [IFLA_GRE_TOS] = { .type = NLA_U8 }, [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, diff --combined net/ipv4/tcp.c index 8a39ee794891,7d23503a1d2d..ef7c0693ad79 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@@ -326,7 -326,7 +326,7 @@@ void tcp_enter_memory_pressure(struct s { unsigned long val;
- if (tcp_memory_pressure) + if (READ_ONCE(tcp_memory_pressure)) return; val = jiffies;
@@@ -341,7 -341,7 +341,7 @@@ void tcp_leave_memory_pressure(struct s { unsigned long val;
- if (!tcp_memory_pressure) + if (!READ_ONCE(tcp_memory_pressure)) return; val = xchg(&tcp_memory_pressure, 0); if (val) @@@ -450,8 -450,8 +450,8 @@@ void tcp_init_sock(struct sock *sk
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; + WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); + WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; @@@ -477,7 -477,7 +477,7 @@@ static void tcp_tx_timestamp(struct soc static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, int target, struct sock *sk) { - return (tp->rcv_nxt - tp->copied_seq >= target) || + return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) || (sk->sk_prot->stream_memory_read ? sk->sk_prot->stream_memory_read(sk) : false); } @@@ -543,10 -543,10 +543,10 @@@ __poll_t tcp_poll(struct file *file, st
/* Connected or passive Fast Open socket? */ if (state != TCP_SYN_SENT && - (state != TCP_SYN_RECV || tp->fastopen_rsk)) { + (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX);
- if (tp->urg_seq == tp->copied_seq && + if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; @@@ -584,7 -584,7 +584,7 @@@ } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR;
return mask; @@@ -607,8 -607,7 +607,8 @@@ int tcp_ioctl(struct sock *sk, int cmd unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = tp->urg_data && tp->urg_seq == tp->copied_seq; + answ = tp->urg_data && + READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: if (sk->sk_state == TCP_LISTEN) @@@ -617,7 -616,7 +617,7 @@@ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = tp->write_seq - tp->snd_una; + answ = READ_ONCE(tp->write_seq) - tp->snd_una; break; case SIOCOUTQNSD: if (sk->sk_state == TCP_LISTEN) @@@ -626,8 -625,7 +626,8 @@@ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = tp->write_seq - tp->snd_nxt; + answ = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); break; default: return -ENOIOCTLCMD; @@@ -659,7 -657,7 +659,7 @@@ static void skb_entail(struct sock *sk tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; @@@ -1034,10 -1032,10 +1034,10 @@@ new_segment skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; - tp->write_seq += copy; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0);
@@@ -1364,7 -1362,7 +1364,7 @@@ new_segment if (!copied) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
- tp->write_seq += copy; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0);
@@@ -1670,9 -1668,9 +1670,9 @@@ int tcp_read_sock(struct sock *sk, read sk_eat_skb(sk, skb); if (!desc->count) break; - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); } - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
@@@ -1701,7 -1699,7 +1701,7 @@@ int tcp_set_rcvlowat(struct sock *sk, i else cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; val = min(val, cap); - sk->sk_rcvlowat = val ? : 1; + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
/* Check if we need to signal EPOLLIN right now */ tcp_data_ready(sk); @@@ -1711,7 -1709,7 +1711,7 @@@
val <<= 1; if (val > sk->sk_rcvbuf) { - sk->sk_rcvbuf = val; + WRITE_ONCE(sk->sk_rcvbuf, val); tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); } return 0; @@@ -1741,8 -1739,8 +1741,8 @@@ static int tcp_zerocopy_receive(struct struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; + u32 length = 0, seq, offset, zap_len; const skb_frag_t *frags = NULL; - u32 length = 0, seq, offset; struct vm_area_struct *vma; struct sk_buff *skb = NULL; struct tcp_sock *tp; @@@ -1769,12 -1767,12 +1769,12 @@@ seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); - zc->length &= ~(PAGE_SIZE - 1); - if (zc->length) { - zap_page_range(vma, address, zc->length); + zap_len = zc->length & ~(PAGE_SIZE - 1); + if (zap_len) { + zap_page_range(vma, address, zap_len); zc->recv_skip_hint = 0; } else { - zc->recv_skip_hint = inq; + zc->recv_skip_hint = zc->length; } ret = 0; while (length + PAGE_SIZE <= zc->length) { @@@ -1821,7 -1819,7 +1821,7 @@@ out: up_read(¤t->mm->mmap_sem); if (length) { - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */ @@@ -1864,33 -1862,29 +1864,33 @@@ static void tcp_recv_timestamp(struct m if (sock_flag(sk, SOCK_RCVTSTAMP)) { if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { if (new_tstamp) { - struct __kernel_timespec kts = {tss->ts[0].tv_sec, tss->ts[0].tv_nsec}; - + struct __kernel_timespec kts = { + .tv_sec = tss->ts[0].tv_sec, + .tv_nsec = tss->ts[0].tv_nsec, + }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, sizeof(kts), &kts); } else { - struct timespec ts_old = timespec64_to_timespec(tss->ts[0]); - + struct __kernel_old_timespec ts_old = { + .tv_sec = tss->ts[0].tv_sec, + .tv_nsec = tss->ts[0].tv_nsec, + }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, sizeof(ts_old), &ts_old); } } else { if (new_tstamp) { - struct __kernel_sock_timeval stv; - - stv.tv_sec = tss->ts[0].tv_sec; - stv.tv_usec = tss->ts[0].tv_nsec / 1000; + struct __kernel_sock_timeval stv = { + .tv_sec = tss->ts[0].tv_sec, + .tv_usec = tss->ts[0].tv_nsec / 1000, + }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, sizeof(stv), &stv); } else { - struct __kernel_old_timeval tv; - - tv.tv_sec = tss->ts[0].tv_sec; - tv.tv_usec = tss->ts[0].tv_nsec / 1000; + struct __kernel_old_timeval tv = { + .tv_sec = tss->ts[0].tv_sec, + .tv_usec = tss->ts[0].tv_nsec / 1000, + }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, sizeof(tv), &tv); } @@@ -1962,12 -1956,13 +1962,12 @@@ int tcp_recvmsg(struct sock *sk, struc struct sk_buff *skb, *last; u32 urg_hole = 0; struct scm_timestamping_internal tss; - bool has_tss = false; - bool has_cmsg; + int cmsg_flags;
if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len);
- if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) sk_busy_loop(sk, nonblock);
@@@ -1977,7 -1972,7 +1977,7 @@@ if (sk->sk_state == TCP_LISTEN) goto out;
- has_cmsg = tp->recvmsg_inq; + cmsg_flags = tp->recvmsg_inq ? 1 : 0; timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */ @@@ -2050,7 -2045,7 +2050,7 @@@
/* Well, if we have backlog, try to process it now yet. */
- if (copied >= target && !sk->sk_backlog.tail) + if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break;
if (copied) { @@@ -2122,7 -2117,7 +2122,7 @@@ found_ok_skb if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { - ++*seq; + WRITE_ONCE(*seq, *seq + 1); urg_hole++; offset++; used--; @@@ -2144,7 -2139,7 +2144,7 @@@ } }
- *seq += used; + WRITE_ONCE(*seq, *seq + used); copied += used; len -= used;
@@@ -2160,7 -2155,8 +2160,7 @@@ skip_copy
if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, &tss); - has_tss = true; - has_cmsg = true; + cmsg_flags |= 2; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; @@@ -2170,7 -2166,7 +2170,7 @@@
found_fin_ok: /* Process the FIN. */ - ++*seq; + WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; @@@ -2185,10 -2181,10 +2185,10 @@@
release_sock(sk);
- if (has_cmsg) { - if (has_tss) + if (cmsg_flags) { + if (cmsg_flags & 2) tcp_recv_timestamp(msg, sk, &tss); - if (tp->recvmsg_inq) { + if (cmsg_flags & 1) { inq = tcp_inq_hint(sk); put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); } @@@ -2491,10 -2487,7 +2491,10 @@@ adjudge_to_death }
if (sk->sk_state == TCP_CLOSE) { - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + struct request_sock *req; + + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); /* We could get here with a non-NULL req if the socket is * aborted (e.g., closed with unread data) before 3WHS * finishes. @@@ -2566,7 -2559,6 +2566,7 @@@ int tcp_disconnect(struct sock *sk, in struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int old_state = sk->sk_state; + u32 seq;
if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); @@@ -2593,7 -2585,7 +2593,7 @@@ __kfree_skb(sk->sk_rx_skb_cache); sk->sk_rx_skb_cache = NULL; } - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->urg_data = 0; tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); @@@ -2609,12 -2601,9 +2609,12 @@@ tp->srtt_us = 0; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; - tp->write_seq += tp->max_window + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; + + seq = tp->write_seq + tp->max_window + 2; + if (!seq) + seq = 1; + WRITE_ONCE(tp->write_seq, seq); + icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; @@@ -2668,7 -2657,6 +2668,7 @@@ /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); inet->defer_connect = 0; + tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@@ -2942,9 -2930,9 +2942,9 @@@ static int do_tcp_setsockopt(struct soc if (sk->sk_state != TCP_CLOSE) err = -EPERM; else if (tp->repair_queue == TCP_SEND_QUEUE) - tp->write_seq = val; + WRITE_ONCE(tp->write_seq, val); else if (tp->repair_queue == TCP_RECV_QUEUE) - tp->rcv_nxt = val; + WRITE_ONCE(tp->rcv_nxt, val); else err = -EINVAL; break; @@@ -3227,8 -3215,8 +3227,8 @@@ void tcp_get_info(struct sock *sk, stru * tcpi_unacked -> Number of children ready for accept() * tcpi_sacked -> max backlog */ - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; + info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog); + info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog); return; }
@@@ -3308,7 -3296,6 +3308,7 @@@ info->tcpi_reord_seen = tp->reord_seen; info->tcpi_rcv_ooopack = tp->rcv_ooopack; info->tcpi_snd_wnd = tp->snd_wnd; + info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@@ -3844,13 -3831,7 +3844,13 @@@ EXPORT_SYMBOL(tcp_md5_hash_key)
void tcp_done(struct sock *sk) { - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + struct request_sock *req; + + /* We might be called with a new socket, after + * inet_csk_prepare_forced_close() has been called + * so we can not use lockdep_sock_is_held(sk) + */ + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); @@@ -3949,7 -3930,7 +3949,7 @@@ void __init tcp_init(void
BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_member(struct sk_buff, cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); diff --combined net/ipv6/ip6_gre.c index 923034c52ce4,be1a8c823b2a..f20561b7f667 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@@ -980,9 -980,9 +980,9 @@@ static netdev_tx_t ip6erspan_tunnel_xmi dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) goto tx_err; - md = ip_tunnel_info_opts(tun_info); - if (!md) + if (tun_info->options_len < sizeof(*md)) goto tx_err; + md = ip_tunnel_info_opts(tun_info);
tun_id = tunnel_id_to_key32(key->tun_id); if (md->version == 1) { @@@ -2170,8 -2170,8 +2170,8 @@@ static const struct nla_policy ip6gre_p [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_IKEY] = { .type = NLA_U32 }, [IFLA_GRE_OKEY] = { .type = NLA_U32 }, - [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, - [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_LOCAL] = { .len = sizeof_member(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = sizeof_member(struct ipv6hdr, daddr) }, [IFLA_GRE_TTL] = { .type = NLA_U8 }, [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, @@@ -2192,7 -2192,6 +2192,7 @@@ static void ip6erspan_tap_setup(struct { ether_setup(dev);
+ dev->max_mtu = 0; dev->netdev_ops = &ip6erspan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; diff --combined net/ipv6/raw.c index dfe5e603ffe1,664b65139816..4621c20352e8 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@@ -925,7 -925,7 +925,7 @@@ static int rawv6_sendmsg(struct sock *s
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; @@@ -1294,7 -1294,7 +1294,7 @@@ struct proto rawv6_prot = .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), .useroffset = offsetof(struct raw6_sock, filter), - .usersize = sizeof_field(struct raw6_sock, filter), + .usersize = sizeof_member(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, diff --combined net/netfilter/nf_tables_api.c index 062b73a83af0,ec252362d2ab..d0256d54d47f --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -151,64 -151,11 +151,64 @@@ static void nft_set_trans_bind(const st } }
+static int nft_netdev_register_hooks(struct net *net, + struct list_head *hook_list) +{ + struct nft_hook *hook; + int err, j; + + j = 0; + list_for_each_entry(hook, hook_list, list) { + err = nf_register_net_hook(net, &hook->ops); + if (err < 0) + goto err_register; + + j++; + } + return 0; + +err_register: + list_for_each_entry(hook, hook_list, list) { + if (j-- <= 0) + break; + + nf_unregister_net_hook(net, &hook->ops); + } + return err; +} + +static void nft_netdev_unregister_hooks(struct net *net, + struct list_head *hook_list) +{ + struct nft_hook *hook; + + list_for_each_entry(hook, hook_list, list) + nf_unregister_net_hook(net, &hook->ops); +} + +static int nft_register_basechain_hooks(struct net *net, int family, + struct nft_base_chain *basechain) +{ + if (family == NFPROTO_NETDEV) + return nft_netdev_register_hooks(net, &basechain->hook_list); + + return nf_register_net_hook(net, &basechain->ops); +} + +static void nft_unregister_basechain_hooks(struct net *net, int family, + struct nft_base_chain *basechain) +{ + if (family == NFPROTO_NETDEV) + nft_netdev_unregister_hooks(net, &basechain->hook_list); + else + nf_unregister_net_hook(net, &basechain->ops); +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { - const struct nft_base_chain *basechain; + struct nft_base_chain *basechain; const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT || @@@ -221,14 -168,14 +221,14 @@@ if (basechain->type->ops_register) return basechain->type->ops_register(net, ops);
- return nf_register_net_hook(net, ops); + return nft_register_basechain_hooks(net, table->family, basechain); }
static void nf_tables_unregister_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { - const struct nft_base_chain *basechain; + struct nft_base_chain *basechain; const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT || @@@ -240,7 -187,7 +240,7 @@@ if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops);
- nf_unregister_net_hook(net, ops); + nft_unregister_basechain_hooks(net, table->family, basechain); }
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@@ -361,7 -308,6 +361,7 @@@ static struct nft_trans *nft_trans_rule
static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) { + struct nft_flow_rule *flow; struct nft_trans *trans; int err;
@@@ -369,16 -315,6 +369,16 @@@ if (trans == NULL) return -ENOMEM;
+ if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) { + flow = nft_flow_rule_create(ctx->net, rule); + if (IS_ERR(flow)) { + nft_trans_destroy(trans); + return PTR_ERR(flow); + } + + nft_trans_flow_rule(trans) = flow; + } + err = nf_tables_delrule_deactivate(ctx, rule); if (err < 0) { nft_trans_destroy(trans); @@@ -806,8 -742,7 +806,8 @@@ static void nft_table_disable(struct ne if (cnt && i++ == cnt) break;
- nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); + nft_unregister_basechain_hooks(net, table->family, + nft_base_chain(chain)); } }
@@@ -822,16 -757,14 +822,16 @@@ static int nf_tables_table_enable(struc if (!nft_is_base_chain(chain)) continue;
- err = nf_register_net_hook(net, &nft_base_chain(chain)->ops); + err = nft_register_basechain_hooks(net, table->family, + nft_base_chain(chain)); if (err < 0) - goto err; + goto err_register_hooks;
i++; } return 0; -err: + +err_register_hooks: if (i) nft_table_disable(net, table, i); return err; @@@ -1292,46 -1225,6 +1292,46 @@@ nla_put_failure return -ENOSPC; }
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family, + const struct nft_base_chain *basechain) +{ + const struct nf_hook_ops *ops = &basechain->ops; + struct nft_hook *hook, *first = NULL; + struct nlattr *nest, *nest_devs; + int n = 0; + + nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); + if (nest == NULL) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) + goto nla_put_failure; + + if (family == NFPROTO_NETDEV) { + nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); + list_for_each_entry(hook, &basechain->hook_list, list) { + if (!first) + first = hook; + + if (nla_put_string(skb, NFTA_DEVICE_NAME, + hook->ops.dev->name)) + goto nla_put_failure; + n++; + } + nla_nest_end(skb, nest_devs); + + if (n == 1 && + nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + + return 0; +nla_put_failure: + return -1; +} + static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, @@@ -1360,10 -1253,21 +1360,10 @@@
if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); - const struct nf_hook_ops *ops = &basechain->ops; struct nft_stats __percpu *stats; - struct nlattr *nest;
- nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); - if (nest == NULL) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) - goto nla_put_failure; - if (basechain->dev_name[0] && - nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name)) + if (nft_dump_basechain_hook(skb, family, basechain)) goto nla_put_failure; - nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY, htonl(basechain->policy))) @@@ -1557,9 -1461,8 +1557,9 @@@ static void nft_chain_stats_replace(str if (!nft_trans_chain_stats(trans)) return;
- rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans), - lockdep_commit_lock_is_held(trans->ctx.net)); + nft_trans_chain_stats(trans) = + rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans), + lockdep_commit_lock_is_held(trans->ctx.net));
if (!nft_trans_chain_stats(trans)) static_branch_inc(&nft_counters_enabled); @@@ -1582,7 -1485,6 +1582,7 @@@ static void nf_tables_chain_free_chain_ static void nf_tables_chain_destroy(struct nft_ctx *ctx) { struct nft_chain *chain = ctx->chain; + struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0)) return; @@@ -1593,13 -1495,6 +1593,13 @@@ if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain);
+ if (ctx->family == NFPROTO_NETDEV) { + list_for_each_entry_safe(hook, next, + &basechain->hook_list, list) { + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); + } + } module_put(basechain->type->owner); if (rcu_access_pointer(basechain->stats)) { static_branch_dec(&nft_counters_enabled); @@@ -1613,125 -1508,13 +1613,125 @@@ } }
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net, + const struct nlattr *attr) +{ + struct net_device *dev; + char ifname[IFNAMSIZ]; + struct nft_hook *hook; + int err; + + hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL); + if (!hook) { + err = -ENOMEM; + goto err_hook_alloc; + } + + nla_strlcpy(ifname, attr, IFNAMSIZ); + dev = __dev_get_by_name(net, ifname); + if (!dev) { + err = -ENOENT; + goto err_hook_dev; + } + hook->ops.dev = dev; + + return hook; + +err_hook_dev: + kfree(hook); +err_hook_alloc: + return ERR_PTR(err); +} + +static bool nft_hook_list_find(struct list_head *hook_list, + const struct nft_hook *this) +{ + struct nft_hook *hook; + + list_for_each_entry(hook, hook_list, list) { + if (this->ops.dev == hook->ops.dev) + return true; + } + + return false; +} + +static int nf_tables_parse_netdev_hooks(struct net *net, + const struct nlattr *attr, + struct list_head *hook_list) +{ + struct nft_hook *hook, *next; + const struct nlattr *tmp; + int rem, n = 0, err; + + nla_for_each_nested(tmp, attr, rem) { + if (nla_type(tmp) != NFTA_DEVICE_NAME) { + err = -EINVAL; + goto err_hook; + } + + hook = nft_netdev_hook_alloc(net, tmp); + if (IS_ERR(hook)) { + err = PTR_ERR(hook); + goto err_hook; + } + if (nft_hook_list_find(hook_list, hook)) { + err = -EEXIST; + goto err_hook; + } + list_add_tail(&hook->list, hook_list); + n++; + + if (n == NFT_NETDEVICE_MAX) { + err = -EFBIG; + goto err_hook; + } + } + if (!n) + return -EINVAL; + + return 0; + +err_hook: + list_for_each_entry_safe(hook, next, hook_list, list) { + list_del(&hook->list); + kfree(hook); + } + return err; +} + struct nft_chain_hook { u32 num; s32 priority; const struct nft_chain_type *type; - struct net_device *dev; + struct list_head list; };
+static int nft_chain_parse_netdev(struct net *net, + struct nlattr *tb[], + struct list_head *hook_list) +{ + struct nft_hook *hook; + int err; + + if (tb[NFTA_HOOK_DEV]) { + hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]); + if (IS_ERR(hook)) + return PTR_ERR(hook); + + list_add_tail(&hook->list, hook_list); + } else if (tb[NFTA_HOOK_DEVS]) { + err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS], + hook_list); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + return 0; +} + static int nft_chain_parse_hook(struct net *net, const struct nlattr * const nla[], struct nft_chain_hook *hook, u8 family, @@@ -1739,6 -1522,7 +1739,6 @@@ { struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; - struct net_device *dev; int err;
lockdep_assert_held(&net->nft.commit_mutex); @@@ -1776,14 -1560,23 +1776,14 @@@
hook->type = type;
- hook->dev = NULL; + INIT_LIST_HEAD(&hook->list); if (family == NFPROTO_NETDEV) { - char ifname[IFNAMSIZ]; - - if (!ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } - - nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); - dev = __dev_get_by_name(net, ifname); - if (!dev) { + err = nft_chain_parse_netdev(net, ha, &hook->list); + if (err < 0) { module_put(type->owner); - return -ENOENT; + return err; } - hook->dev = dev; - } else if (ha[NFTA_HOOK_DEV]) { + } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) { module_put(type->owner); return -EOPNOTSUPP; } @@@ -1793,12 -1586,6 +1793,12 @@@
static void nft_chain_release_hook(struct nft_chain_hook *hook) { + struct nft_hook *h, *next; + + list_for_each_entry_safe(h, next, &hook->list, list) { + list_del(&h->list); + kfree(h); + } module_put(hook->type->owner); }
@@@ -1823,49 -1610,6 +1823,49 @@@ static struct nft_rule **nf_tables_chai return kvmalloc(alloc, GFP_KERNEL); }
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family, + const struct nft_chain_hook *hook, + struct nft_chain *chain) +{ + ops->pf = family; + ops->hooknum = hook->num; + ops->priority = hook->priority; + ops->priv = chain; + ops->hook = hook->type->hooks[ops->hooknum]; +} + +static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, + struct nft_chain_hook *hook, u32 flags) +{ + struct nft_chain *chain; + struct nft_hook *h; + + basechain->type = hook->type; + INIT_LIST_HEAD(&basechain->hook_list); + chain = &basechain->chain; + + if (family == NFPROTO_NETDEV) { + list_splice_init(&hook->list, &basechain->hook_list); + list_for_each_entry(h, &basechain->hook_list, list) + nft_basechain_hook_init(&h->ops, family, hook, chain); + + basechain->ops.hooknum = hook->num; + basechain->ops.priority = hook->priority; + } else { + nft_basechain_hook_init(&basechain->ops, family, hook, chain); + } + + chain->flags |= NFT_BASE_CHAIN | flags; + basechain->policy = NF_ACCEPT; + if (chain->flags & NFT_CHAIN_HW_OFFLOAD && + nft_chain_offload_priority(basechain) < 0) + return -EOPNOTSUPP; + + flow_block_init(&basechain->flow_block); + + return 0; +} + static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, u8 policy, u32 flags) { @@@ -1884,6 -1628,7 +1884,6 @@@
if (nla[NFTA_CHAIN_HOOK]) { struct nft_chain_hook hook; - struct nf_hook_ops *ops;
err = nft_chain_parse_hook(net, nla, &hook, family, true); if (err < 0) @@@ -1894,7 -1639,9 +1894,7 @@@ nft_chain_release_hook(&hook); return -ENOMEM; } - - if (hook.dev != NULL) - strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ); + chain = &basechain->chain;
if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); @@@ -1907,12 -1654,24 +1907,12 @@@ static_branch_inc(&nft_counters_enabled); }
- basechain->type = hook.type; - chain = &basechain->chain; - - ops = &basechain->ops; - ops->pf = family; - ops->hooknum = hook.num; - ops->priority = hook.priority; - ops->priv = chain; - ops->hook = hook.type->hooks[ops->hooknum]; - ops->dev = hook.dev; - - chain->flags |= NFT_BASE_CHAIN | flags; - basechain->policy = NF_ACCEPT; - if (chain->flags & NFT_CHAIN_HW_OFFLOAD && - nft_chain_offload_priority(basechain) < 0) - return -EOPNOTSUPP; - - flow_block_init(&basechain->flow_block); + err = nft_basechain_init(basechain, family, &hook, flags); + if (err < 0) { + nft_chain_release_hook(&hook); + kfree(basechain); + return err; + } } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@@ -1972,25 -1731,6 +1972,25 @@@ err1 return err; }
+static bool nft_hook_list_equal(struct list_head *hook_list1, + struct list_head *hook_list2) +{ + struct nft_hook *hook; + int n = 0, m = 0; + + n = 0; + list_for_each_entry(hook, hook_list2, list) { + if (!nft_hook_list_find(hook_list1, hook)) + return false; + + n++; + } + list_for_each_entry(hook, hook_list1, list) + m++; + + return n == m; +} + static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, u32 flags) { @@@ -2022,19 -1762,12 +2022,19 @@@ return -EBUSY; }
- ops = &basechain->ops; - if (ops->hooknum != hook.num || - ops->priority != hook.priority || - ops->dev != hook.dev) { - nft_chain_release_hook(&hook); - return -EBUSY; + if (ctx->family == NFPROTO_NETDEV) { + if (!nft_hook_list_equal(&basechain->hook_list, + &hook.list)) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + } else { + ops = &basechain->ops; + if (ops->hooknum != hook.num || + ops->priority != hook.priority) { + nft_chain_release_hook(&hook); + return -EBUSY; + } } nft_chain_release_hook(&hook); } @@@ -2189,7 -1922,6 +2189,7 @@@ static int nf_tables_newchain(struct ne if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP;
+ flags |= chain->flags & NFT_BASE_CHAIN; return nf_tables_updchain(&ctx, genmask, policy, flags); }
@@@ -5411,6 -5143,9 +5411,6 @@@ static int nf_tables_updobj(const struc struct nft_trans *trans; int err;
- if (!obj->ops->update) - return -EOPNOTSUPP; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) @@@ -5847,7 -5582,6 +5847,7 @@@ static const struct nla_policy nft_flow .len = NFT_NAME_MAXLEN - 1 }, [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, + [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 }, };
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, @@@ -5894,6 -5628,43 +5894,6 @@@ nft_flowtable_lookup_byhandle(const str return ERR_PTR(-ENOENT); }
-static int nf_tables_parse_devices(const struct nft_ctx *ctx, - const struct nlattr *attr, - struct net_device *dev_array[], int *len) -{ - const struct nlattr *tmp; - struct net_device *dev; - char ifname[IFNAMSIZ]; - int rem, n = 0, err; - - nla_for_each_nested(tmp, attr, rem) { - if (nla_type(tmp) != NFTA_DEVICE_NAME) { - err = -EINVAL; - goto err1; - } - - nla_strlcpy(ifname, tmp, IFNAMSIZ); - dev = __dev_get_by_name(ctx->net, ifname); - if (!dev) { - err = -ENOENT; - goto err1; - } - - dev_array[n++] = dev; - if (n == NFT_FLOWTABLE_DEVICE_MAX) { - err = -EFBIG; - goto err1; - } - } - if (!len) - return -EINVAL; - - err = 0; -err1: - *len = n; - return err; -} - static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = { [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 }, [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 }, @@@ -5904,10 -5675,11 +5904,10 @@@ static int nf_tables_flowtable_parse_ho const struct nlattr *attr, struct nft_flowtable *flowtable) { - struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX]; struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; - struct nf_hook_ops *ops; + struct nft_hook *hook; int hooknum, priority; - int err, n = 0, i; + int err;
err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, nft_flowtable_hook_policy, NULL); @@@ -5925,21 -5697,27 +5925,21 @@@
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
- err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS], - dev_array, &n); + err = nf_tables_parse_netdev_hooks(ctx->net, + tb[NFTA_FLOWTABLE_HOOK_DEVS], + &flowtable->hook_list); if (err < 0) return err;
- ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL); - if (!ops) - return -ENOMEM; - - flowtable->hooknum = hooknum; - flowtable->priority = priority; - flowtable->ops = ops; - flowtable->ops_len = n; + flowtable->hooknum = hooknum; + flowtable->data.priority = priority;
- for (i = 0; i < n; i++) { - flowtable->ops[i].pf = NFPROTO_NETDEV; - flowtable->ops[i].hooknum = hooknum; - flowtable->ops[i].priority = priority; - flowtable->ops[i].priv = &flowtable->data; - flowtable->ops[i].hook = flowtable->data.type->hook; - flowtable->ops[i].dev = dev_array[i]; + list_for_each_entry(hook, &flowtable->hook_list, list) { + hook->ops.pf = NFPROTO_NETDEV; + hook->ops.hooknum = hooknum; + hook->ops.priority = priority; + hook->ops.priv = &flowtable->data; + hook->ops.hook = flowtable->data.type->hook; }
return err; @@@ -5976,73 -5754,17 +5976,73 @@@ nft_flowtable_type_get(struct net *net return ERR_PTR(-ENOENT); }
+static void nft_unregister_flowtable_hook(struct net *net, + struct nft_flowtable *flowtable, + struct nft_hook *hook) +{ + nf_unregister_net_hook(net, &hook->ops); + flowtable->data.type->setup(&flowtable->data, hook->ops.dev, + FLOW_BLOCK_UNBIND); +} + static void nft_unregister_flowtable_net_hooks(struct net *net, struct nft_flowtable *flowtable) { - int i; + struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) { - if (!flowtable->ops[i].dev) - continue; + list_for_each_entry(hook, &flowtable->hook_list, list) + nft_unregister_flowtable_hook(net, flowtable, hook); +} + +static int nft_register_flowtable_net_hooks(struct net *net, + struct nft_table *table, + struct nft_flowtable *flowtable) +{ + struct nft_hook *hook, *hook2, *next; + struct nft_flowtable *ft; + int err, i = 0; + + list_for_each_entry(hook, &flowtable->hook_list, list) { + list_for_each_entry(ft, &table->flowtables, list) { + list_for_each_entry(hook2, &ft->hook_list, list) { + if (hook->ops.dev == hook2->ops.dev && + hook->ops.pf == hook2->ops.pf) { + err = -EBUSY; + goto err_unregister_net_hooks; + } + } + } + + err = flowtable->data.type->setup(&flowtable->data, + hook->ops.dev, + FLOW_BLOCK_BIND); + if (err < 0) + goto err_unregister_net_hooks; + + err = nf_register_net_hook(net, &hook->ops); + if (err < 0) { + flowtable->data.type->setup(&flowtable->data, + hook->ops.dev, + FLOW_BLOCK_UNBIND); + goto err_unregister_net_hooks; + } + + i++; + } + + return 0;
- nf_unregister_net_hook(net, &flowtable->ops[i]); +err_unregister_net_hooks: + list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { + if (i-- <= 0) + break; + + nft_unregister_flowtable_hook(net, flowtable, hook); + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); } + + return err; }
static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, @@@ -6053,13 -5775,12 +6053,13 @@@ { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nf_flowtable_type *type; - struct nft_flowtable *flowtable, *ft; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + struct nft_hook *hook, *next; struct nft_table *table; struct nft_ctx ctx; - int err, i, k; + int err;
if (!nla[NFTA_FLOWTABLE_TABLE] || !nla[NFTA_FLOWTABLE_NAME] || @@@ -6098,7 -5819,6 +6098,7 @@@
flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&flowtable->hook_list);
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL); if (!flowtable->name) { @@@ -6112,14 -5832,6 +6112,14 @@@ goto err2; }
+ if (nla[NFTA_FLOWTABLE_FLAGS]) { + flowtable->data.flags = + ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); + if (flowtable->data.flags & ~NF_FLOWTABLE_HW_OFFLOAD) + goto err3; + } + + write_pnet(&flowtable->data.net, net); flowtable->data.type = type; err = type->init(&flowtable->data); if (err < 0) @@@ -6130,24 -5842,43 +6130,24 @@@ if (err < 0) goto err4;
- for (i = 0; i < flowtable->ops_len; i++) { - if (!flowtable->ops[i].dev) - continue; - - list_for_each_entry(ft, &table->flowtables, list) { - for (k = 0; k < ft->ops_len; k++) { - if (!ft->ops[k].dev) - continue; - - if (flowtable->ops[i].dev == ft->ops[k].dev && - flowtable->ops[i].pf == ft->ops[k].pf) { - err = -EBUSY; - goto err5; - } - } - } - - err = nf_register_net_hook(net, &flowtable->ops[i]); - if (err < 0) - goto err5; - } + err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable); + if (err < 0) + goto err4;
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err6; + goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables); table->use++;
return 0; -err6: - i = flowtable->ops_len; err5: - for (k = i - 1; k >= 0; k--) - nf_unregister_net_hook(net, &flowtable->ops[k]); - - kfree(flowtable->ops); + list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { + nft_unregister_flowtable_hook(net, flowtable, hook); + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); + } err4: flowtable->data.type->free(&flowtable->data); err3: @@@ -6214,8 -5945,8 +6214,8 @@@ static int nf_tables_fill_flowtable_inf { struct nlattr *nest, *nest_devs; struct nfgenmsg *nfmsg; + struct nft_hook *hook; struct nlmsghdr *nlh; - int i;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); @@@ -6231,23 -5962,25 +6231,23 @@@ nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), - NFTA_FLOWTABLE_PAD)) + NFTA_FLOWTABLE_PAD) || + nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) goto nla_put_failure;
nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK); if (!nest) goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || - nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) + nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority))) goto nla_put_failure;
nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS); if (!nest_devs) goto nla_put_failure;
- for (i = 0; i < flowtable->ops_len; i++) { - const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev); - - if (dev && - nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) + list_for_each_entry_rcu(hook, &flowtable->hook_list, list) { + if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name)) goto nla_put_failure; } nla_nest_end(skb, nest_devs); @@@ -6438,12 -6171,7 +6438,12 @@@ err
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { - kfree(flowtable->ops); + struct nft_hook *hook, *next; + + list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { + list_del_rcu(&hook->list); + kfree(hook); + } kfree(flowtable->name); flowtable->data.type->free(&flowtable->data); module_put(flowtable->data.type->owner); @@@ -6483,15 -6211,14 +6483,15 @@@ nla_put_failure static void nft_flowtable_event(unsigned long event, struct net_device *dev, struct nft_flowtable *flowtable) { - int i; + struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) { - if (flowtable->ops[i].dev != dev) + list_for_each_entry(hook, &flowtable->hook_list, list) { + if (hook->ops.dev != dev) continue;
- nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]); - flowtable->ops[i].dev = NULL; + nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook); + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); break; } } @@@ -6772,8 -6499,7 +6772,8 @@@ static void nft_obj_commit_update(struc obj = nft_trans_obj(trans); newobj = nft_trans_obj_newobj(trans);
- obj->ops->update(obj, newobj); + if (obj->ops->update) + obj->ops->update(obj, newobj);
kfree(newobj); } @@@ -7595,7 -7321,7 +7595,7 @@@ int nft_validate_register_load(enum nft return -EINVAL; if (len == 0) return -EINVAL; - if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data)) + if (reg * NFT_REG32_SIZE + len > sizeof_member(struct nft_regs, data)) return -ERANGE;
return 0; @@@ -7643,7 -7369,7 +7643,7 @@@ int nft_validate_register_store(const s if (len == 0) return -EINVAL; if (reg * NFT_REG32_SIZE + len > - FIELD_SIZEOF(struct nft_regs, data)) + sizeof_member(struct nft_regs, data)) return -ERANGE;
if (data != NULL && type != NFT_DATA_VALUE) diff --combined net/openvswitch/datapath.c index 1047e8043084,4c412b2b27b4..17174db54a27 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@@ -227,8 -227,7 +227,8 @@@ void ovs_dp_process_packet(struct sk_bu stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */ - flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), + &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall;
@@@ -350,8 -349,7 +350,8 @@@ static size_t upcall_msg_size(const str size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ - + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */ + + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ + + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */ if (upcall_info->userdata) @@@ -394,7 -392,6 +394,7 @@@ static int queue_userspace_packet(struc size_t len; unsigned int hlen; int err, dp_ifindex; + u64 hash;
dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) @@@ -487,30 -484,23 +487,30 @@@ }
/* Add OVS_PACKET_ATTR_MRU */ - if (upcall_info->mru) { - if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, - upcall_info->mru)) { - err = -ENOBUFS; - goto out; - } - pad_packet(dp, user_skb); + if (upcall_info->mru && + nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { + err = -ENOBUFS; + goto out; }
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */ - if (cutlen > 0) { - if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, - skb->len)) { - err = -ENOBUFS; - goto out; - } - pad_packet(dp, user_skb); + if (cutlen > 0 && + nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { + err = -ENOBUFS; + goto out; + } + + /* Add OVS_PACKET_ATTR_HASH */ + hash = skb_get_hash_raw(skb); + if (skb->sw_hash) + hash |= OVS_PACKET_HASH_SW_BIT; + + if (skb->l4_hash) + hash |= OVS_PACKET_HASH_L4_BIT; + + if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { + err = -ENOBUFS; + goto out; }
/* Only reserve room for attribute header, packet data is added @@@ -552,7 -542,6 +552,7 @@@ static int ovs_packet_cmd_execute(struc struct datapath *dp; struct vport *input_vport; u16 mru = 0; + u64 hash; int len; int err; bool log = !a[OVS_PACKET_ATTR_PROBE]; @@@ -578,14 -567,6 +578,14 @@@ } OVS_CB(packet)->mru = mru;
+ if (a[OVS_PACKET_ATTR_HASH]) { + hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); + + __skb_set_hash(packet, hash & 0xFFFFFFFFULL, + !!(hash & OVS_PACKET_HASH_SW_BIT), + !!(hash & OVS_PACKET_HASH_L4_BIT)); + } + /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); @@@ -723,13 -704,9 +723,13 @@@ static size_t ovs_flow_cmd_msg_size(con { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
- /* OVS_FLOW_ATTR_UFID */ + /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback + * see ovs_nla_put_identifier() + */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); + else + len += nla_total_size(ovs_key_attr_size());
/* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) @@@ -905,10 -882,7 +905,10 @@@ static struct sk_buff *ovs_flow_cmd_bui retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); - BUG_ON(retval < 0); + if (WARN_ON_ONCE(retval < 0)) { + kfree_skb(skb); + skb = ERR_PTR(retval); + } return skb; }
@@@ -1372,10 -1346,7 +1372,10 @@@ static int ovs_flow_cmd_del(struct sk_b OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); - BUG_ON(err < 0); + if (WARN_ON_ONCE(err < 0)) { + kfree_skb(reply); + goto out_free; + }
ovs_notify(&dp_flow_genl_family, reply, info); } else { @@@ -1383,7 -1354,6 +1383,7 @@@ } }
+out_free: ovs_flow_free(flow, true); return 0; unlock: @@@ -1605,31 -1575,6 +1605,31 @@@ static int ovs_dp_change(struct datapat return 0; }
+static int ovs_dp_stats_init(struct datapath *dp) +{ + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); + if (!dp->stats_percpu) + return -ENOMEM; + + return 0; +} + +static int ovs_dp_vport_init(struct datapath *dp) +{ + int i; + + dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) + return -ENOMEM; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + + return 0; +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@@ -1638,7 -1583,7 +1638,7 @@@ struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err, i; + int err;
err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@@ -1651,26 -1596,35 +1651,26 @@@ err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_free_reply; + goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); if (err) - goto err_free_dp; + goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); - if (!dp->stats_percpu) { - err = -ENOMEM; + err = ovs_dp_stats_init(dp); + if (err) goto err_destroy_table; - }
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, - sizeof(struct hlist_head), - GFP_KERNEL); - if (!dp->ports) { - err = -ENOMEM; - goto err_destroy_percpu; - } - - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_dp_vport_init(dp); + if (err) + goto err_destroy_stats;
err = ovs_meters_init(dp); if (err) - goto err_destroy_ports_array; + goto err_destroy_ports;
/* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); @@@ -1702,7 -1656,6 +1702,7 @@@ ovs_dp_reset_user_features(skb, info); }
+ ovs_unlock(); goto err_destroy_meters; }
@@@ -1719,16 -1672,17 +1719,16 @@@ return 0;
err_destroy_meters: - ovs_unlock(); ovs_meters_exit(dp); -err_destroy_ports_array: +err_destroy_ports: kfree(dp->ports); -err_destroy_percpu: +err_destroy_stats: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(&dp->table); -err_free_dp: +err_destroy_dp: kfree(dp); -err_free_reply: +err_destroy_reply: kfree_skb(reply); err: return err; @@@ -1927,7 -1881,7 +1927,7 @@@ static struct genl_family dp_datapath_g /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, struct net *net, u32 portid, u32 seq, - u32 flags, u8 cmd) + u32 flags, u8 cmd, gfp_t gfp) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@@ -1948,7 -1902,7 +1948,7 @@@ goto nla_put_failure;
if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev)); + int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) goto nla_put_failure; @@@ -1989,12 -1943,11 +1989,12 @@@ struct sk_buff *ovs_vport_cmd_build_inf struct sk_buff *skb; int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, + GFP_KERNEL); BUG_ON(retval < 0);
return skb; @@@ -2136,7 -2089,7 +2136,7 @@@ restart
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_NEW); + OVS_VPORT_CMD_NEW, GFP_KERNEL);
new_headroom = netdev_get_fwd_headroom(vport->dev);
@@@ -2197,7 -2150,7 +2197,7 @@@ static int ovs_vport_cmd_set(struct sk_
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_SET); + OVS_VPORT_CMD_SET, GFP_KERNEL); BUG_ON(err < 0);
ovs_unlock(); @@@ -2237,7 -2190,7 +2237,7 @@@ static int ovs_vport_cmd_del(struct sk_
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_DEL); + OVS_VPORT_CMD_DEL, GFP_KERNEL); BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */ @@@ -2284,7 -2237,7 +2284,7 @@@ static int ovs_vport_cmd_get(struct sk_ goto exit_unlock_free; err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_GET); + OVS_VPORT_CMD_GET, GFP_ATOMIC); BUG_ON(err < 0); rcu_read_unlock();
@@@ -2320,8 -2273,7 +2320,8 @@@ static int ovs_vport_cmd_dump(struct sk NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_GET) < 0) + OVS_VPORT_CMD_GET, + GFP_ATOMIC) < 0) goto out;
j++; @@@ -2497,7 -2449,7 +2497,7 @@@ static int __init dp_init(void { int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_member(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
diff --combined net/openvswitch/flow.h index fd8ed766bdd1,c9987cd5e03c..23cb757b11b5 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@@ -30,14 -30,13 +30,14 @@@ enum sw_flow_mac_proto MAC_PROTO_ETHERNET, }; #define SW_FLOW_KEY_INVALID 0x80 +#define MPLS_LABEL_DEPTH 3
/* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length * matching for small options. */ #define TUN_METADATA_OFFSET(opt_len) \ - (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) + (sizeof_member(struct sw_flow_key, tun_opts) - opt_len) #define TUN_METADATA_OPTS(flow_key, opt_len) \ ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
@@@ -52,7 -51,7 +52,7 @@@ struct vlan_head
#define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ - FIELD_SIZEOF(struct sw_flow_key, recirc_id)) + sizeof_member(struct sw_flow_key, recirc_id))
struct ovs_key_nsh { struct ovs_nsh_key_base base; @@@ -85,6 -84,9 +85,6 @@@ struct sw_flow_key * protocol. */ union { - struct { - __be32 top_lse; /* top label stack entry */ - } mpls; struct { u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ u8 tos; /* IP ToS. */ @@@ -133,11 -135,6 +133,11 @@@ } nd; }; } ipv6; + struct { + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ + } mpls; + struct ovs_key_nsh nsh; /* network service header */ }; struct { @@@ -169,6 -166,7 +169,6 @@@ struct sw_flow_key_range struct sw_flow_mask { int ref_count; struct rcu_head rcu; - struct list_head list; struct sw_flow_key_range range; struct sw_flow_key key; }; diff --combined net/sched/act_ct.c index bf2d69335d4b,3ee22bbac3ff..86a978581450 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@@ -312,7 -312,7 +312,7 @@@ static void tcf_ct_act_set_labels(struc u32 *labels_m) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) - size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels); + size_t labels_sz = sizeof_member(struct tcf_ct_params, labels);
if (!memchr_inv(labels_m, 0, labels_sz)) return; @@@ -329,7 -329,6 +329,7 @@@ static int tcf_ct_act_nat(struct sk_buf bool commit) { #if IS_ENABLED(CONFIG_NF_NAT) + int err; enum nf_nat_manip_type maniptype;
if (!(ct_action & TCA_CT_ACT_NAT)) @@@ -360,17 -359,7 +360,17 @@@ return NF_ACCEPT; }
- return ct_nat_execute(skb, ct, ctinfo, range, maniptype); + err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); + if (err == NF_ACCEPT && + ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); + } + return err; #else return NF_ACCEPT; #endif @@@ -476,15 -465,16 +476,15 @@@ out_push skb_push_rcsum(skb, nh_ofs);
out: - bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); + tcf_action_update_bstats(&c->common, skb); return retval;
drop: - qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); + tcf_action_inc_drop_qstats(&c->common); return TC_ACT_SHOT; }
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { - [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 }, [TCA_CT_ACTION] = { .type = NLA_U16 }, [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, [TCA_CT_ZONE] = { .type = NLA_U16 }, @@@ -666,7 -656,7 +666,7 @@@ static int tcf_ct_fill_params(struct ne static int tcf_ct_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int replace, int bind, bool rtnl_held, - struct tcf_proto *tp, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ct_net_id); @@@ -698,8 -688,8 +698,8 @@@ return err;
if (!err) { - err = tcf_idr_create(tn, index, est, a, - &act_ct_ops, bind, true); + err = tcf_idr_create_from_flags(tn, index, est, a, + &act_ct_ops, bind, flags); if (err) { tcf_idr_cleanup(tn, index); return err; @@@ -732,8 -722,7 +732,8 @@@
spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); - rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock)); + params = rcu_replace_pointer(c->params, params, + lockdep_is_held(&c->tcf_lock)); spin_unlock_bh(&c->tcf_lock);
if (goto_ch) @@@ -916,7 -905,11 +916,7 @@@ static void tcf_stats_update(struct tc_ { struct tcf_ct *c = to_ct(a);
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - - if (hw) - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), - bytes, packets); + tcf_action_update_stats(a, bytes, packets, false, hw); c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); }
@@@ -936,7 -929,7 +936,7 @@@ static struct tc_action_ops act_ct_ops
static __net_init int ct_init_net(struct net *net) { - unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8; + unsigned int n_bits = sizeof_member(struct tcf_ct_params, labels) * 8; struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
if (nf_connlabels_get(net, n_bits - 1)) { diff --combined net/sched/cls_flower.c index 6c68971d99df,4e8a3377ac4e..4612fef48eb5 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@@ -22,8 -22,6 +22,8 @@@ #include <net/ip.h> #include <net/flow_dissector.h> #include <net/geneve.h> +#include <net/vxlan.h> +#include <net/erspan.h>
#include <net/dst.h> #include <net/dst_metadata.h> @@@ -56,13 -54,8 +56,13 @@@ struct fl_flow_key struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; struct flow_dissector_key_enc_opts enc_opts; - struct flow_dissector_key_ports tp_min; - struct flow_dissector_key_ports tp_max; + union { + struct flow_dissector_key_ports tp; + struct { + struct flow_dissector_key_ports tp_min; + struct flow_dissector_key_ports tp_max; + }; + } tp_range; struct flow_dissector_key_ct ct; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@@ -205,19 -198,19 +205,19 @@@ static bool fl_range_port_dst_cmp(struc { __be16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_min.dst); - max_mask = htons(filter->mask->key.tp_max.dst); - min_val = htons(filter->key.tp_min.dst); - max_val = htons(filter->key.tp_max.dst); + min_mask = htons(filter->mask->key.tp_range.tp_min.dst); + max_mask = htons(filter->mask->key.tp_range.tp_max.dst); + min_val = htons(filter->key.tp_range.tp_min.dst); + max_val = htons(filter->key.tp_range.tp_max.dst);
if (min_mask && max_mask) { - if (htons(key->tp.dst) < min_val || - htons(key->tp.dst) > max_val) + if (htons(key->tp_range.tp.dst) < min_val || + htons(key->tp_range.tp.dst) > max_val) return false;
/* skb does not have min and max values */ - mkey->tp_min.dst = filter->mkey.tp_min.dst; - mkey->tp_max.dst = filter->mkey.tp_max.dst; + mkey->tp_range.tp_min.dst = filter->mkey.tp_range.tp_min.dst; + mkey->tp_range.tp_max.dst = filter->mkey.tp_range.tp_max.dst; } return true; } @@@ -228,19 -221,19 +228,19 @@@ static bool fl_range_port_src_cmp(struc { __be16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_min.src); - max_mask = htons(filter->mask->key.tp_max.src); - min_val = htons(filter->key.tp_min.src); - max_val = htons(filter->key.tp_max.src); + min_mask = htons(filter->mask->key.tp_range.tp_min.src); + max_mask = htons(filter->mask->key.tp_range.tp_max.src); + min_val = htons(filter->key.tp_range.tp_min.src); + max_val = htons(filter->key.tp_range.tp_max.src);
if (min_mask && max_mask) { - if (htons(key->tp.src) < min_val || - htons(key->tp.src) > max_val) + if (htons(key->tp_range.tp.src) < min_val || + htons(key->tp_range.tp.src) > max_val) return false;
/* skb does not have min and max values */ - mkey->tp_min.src = filter->mkey.tp_min.src; - mkey->tp_max.src = filter->mkey.tp_max.src; + mkey->tp_range.tp_min.src = filter->mkey.tp_range.tp_min.src; + mkey->tp_range.tp_max.src = filter->mkey.tp_range.tp_max.src; } return true; } @@@ -695,11 -688,7 +695,11 @@@ static const struct nla_policy fl_polic
static const struct nla_policy enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPTS_UNSPEC] = { + .strict_start_type = TCA_FLOWER_KEY_ENC_OPTS_VXLAN }, [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, };
static const struct nla_policy @@@ -710,19 -699,6 +710,19 @@@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OP .len = 128 }, };
+static const struct nla_policy +vxlan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, +}; + static void fl_set_key_val(struct nlattr **tb, void *val, int val_type, void *mask, int mask_type, int len) @@@ -739,25 -715,23 +739,25 @@@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask) { - fl_set_key_val(tb, &key->tp_min.dst, - TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst, - TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst)); - fl_set_key_val(tb, &key->tp_max.dst, - TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst, - TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst)); - fl_set_key_val(tb, &key->tp_min.src, - TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src, - TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src)); - fl_set_key_val(tb, &key->tp_max.src, - TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src, - TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src)); - - if ((mask->tp_min.dst && mask->tp_max.dst && - htons(key->tp_max.dst) <= htons(key->tp_min.dst)) || - (mask->tp_min.src && mask->tp_max.src && - htons(key->tp_max.src) <= htons(key->tp_min.src))) + fl_set_key_val(tb, &key->tp_range.tp_min.dst, + TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_range.tp_min.dst, + TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.dst)); + fl_set_key_val(tb, &key->tp_range.tp_max.dst, + TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_range.tp_max.dst, + TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.dst)); + fl_set_key_val(tb, &key->tp_range.tp_min.src, + TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_range.tp_min.src, + TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.src)); + fl_set_key_val(tb, &key->tp_range.tp_max.src, + TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src, + TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); + + if ((mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && + htons(key->tp_range.tp_max.dst) <= + htons(key->tp_range.tp_min.dst)) || + (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src && + htons(key->tp_range.tp_max.src) <= + htons(key->tp_range.tp_min.src))) return -EINVAL;
return 0; @@@ -954,105 -928,6 +954,105 @@@ static int fl_set_geneve_opt(const stru return sizeof(struct geneve_opt) + data_len; }
+static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1]; + struct vxlan_metadata *md; + int err; + + md = (struct vxlan_metadata *)&key->enc_opts.data[key->enc_opts.len]; + memset(md, 0xff, sizeof(*md)); + + if (!depth) + return sizeof(*md); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_VXLAN) { + NL_SET_ERR_MSG(extack, "Non-vxlan option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX, nla, + vxlan_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) + md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]); + + return sizeof(*md); +} + +static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1]; + struct erspan_metadata *md; + int err; + + md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len]; + memset(md, 0xff, sizeof(*md)); + md->version = 1; + + if (!depth) + return sizeof(*md); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_ERSPAN) { + NL_SET_ERR_MSG(extack, "Non-erspan option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX, nla, + erspan_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) + md->version = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]); + + if (md->version == 1) { + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index"); + return -EINVAL; + } + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { + nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]; + md->u.index = nla_get_be32(nla); + } + } else if (md->version == 2) { + if (!option_len && (!tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] || + !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID])) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid"); + return -EINVAL; + } + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) { + nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]; + md->u.md2.dir = nla_get_u8(nla); + } + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) { + nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]; + set_hwid(&md->u.md2, nla_get_u8(nla)); + } + } else { + NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect"); + return -EINVAL; + } + + return sizeof(*md); +} + static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@@ -1083,11 -958,6 +1083,11 @@@ nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { switch (nla_type(nla_opt_key)) { case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: + if (key->enc_opts.dst_opt_type && + key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); + return -EINVAL; + } option_len = 0; key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; option_len = fl_set_geneve_opt(nla_opt_key, key, @@@ -1113,72 -983,6 +1113,72 @@@ return -EINVAL; }
+ if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + case TCA_FLOWER_KEY_ENC_OPTS_VXLAN: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + option_len = fl_set_vxlan_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + option_len = fl_set_vxlan_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG(extack, "Duplicate type for erspan options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + option_len = fl_set_erspan_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + option_len = fl_set_erspan_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + if (msk_depth) nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; @@@ -1481,7 -1285,7 +1481,7 @@@ static int fl_init_mask_hashtable(struc }
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) - #define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member) + #define FL_KEY_MEMBER_SIZE(member) sizeof_member(struct fl_flow_key, member)
#define FL_KEY_IS_MASKED(mask, member) \ memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \ @@@ -1516,10 -1320,9 +1516,10 @@@ static void fl_init_dissector(struct fl FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); - if (FL_KEY_IS_MASKED(mask, tp) || - FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max)) - FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS, tp); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS_RANGE, tp_range); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IP, ip); FL_KEY_SET_IF_MASKED(mask, keys, cnt, @@@ -1568,10 -1371,8 +1568,10 @@@ static struct fl_flow_mask *fl_create_n
fl_mask_copy(newmask, mask);
- if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) || - (newmask->key.tp_min.src && newmask->key.tp_max.src)) + if ((newmask->key.tp_range.tp_min.dst && + newmask->key.tp_range.tp_max.dst) || + (newmask->key.tp_range.tp_min.src && + newmask->key.tp_range.tp_max.src)) newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
err = fl_init_mask_hashtable(newmask); @@@ -2169,22 -1970,18 +2169,22 @@@ static int fl_dump_key_val(struct sk_bu static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key, struct fl_flow_key *mask) { - if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN, - &mask->tp_min.dst, TCA_FLOWER_UNSPEC, - sizeof(key->tp_min.dst)) || - fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX, - &mask->tp_max.dst, TCA_FLOWER_UNSPEC, - sizeof(key->tp_max.dst)) || - fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN, - &mask->tp_min.src, TCA_FLOWER_UNSPEC, - sizeof(key->tp_min.src)) || - fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX, - &mask->tp_max.src, TCA_FLOWER_UNSPEC, - sizeof(key->tp_max.src))) + if (fl_dump_key_val(skb, &key->tp_range.tp_min.dst, + TCA_FLOWER_KEY_PORT_DST_MIN, + &mask->tp_range.tp_min.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp_range.tp_min.dst)) || + fl_dump_key_val(skb, &key->tp_range.tp_max.dst, + TCA_FLOWER_KEY_PORT_DST_MAX, + &mask->tp_range.tp_max.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp_range.tp_max.dst)) || + fl_dump_key_val(skb, &key->tp_range.tp_min.src, + TCA_FLOWER_KEY_PORT_SRC_MIN, + &mask->tp_range.tp_min.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp_range.tp_min.src)) || + fl_dump_key_val(skb, &key->tp_range.tp_max.src, + TCA_FLOWER_KEY_PORT_SRC_MAX, + &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp_range.tp_max.src))) return -1;
return 0; @@@ -2338,61 -2135,6 +2338,61 @@@ nla_put_failure return -EMSGSIZE; }
+static int fl_dump_key_vxlan_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct vxlan_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_VXLAN); + if (!nest) + goto nla_put_failure; + + md = (struct vxlan_metadata *)&enc_opts->data[0]; + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, md->gbp)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_erspan_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct erspan_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_ERSPAN); + if (!nest) + goto nla_put_failure; + + md = (struct erspan_metadata *)&enc_opts->data[0]; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, md->version)) + goto nla_put_failure; + + if (md->version == 1 && + nla_put_be32(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index)) + goto nla_put_failure; + + if (md->version == 2 && + (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR, + md->u.md2.dir) || + nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID, + get_hwid(&md->u.md2)))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fl_dump_key_ct(struct sk_buff *skb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask) @@@ -2446,16 -2188,6 +2446,16 @@@ static int fl_dump_key_options(struct s if (err) goto nla_put_failure; break; + case TUNNEL_VXLAN_OPT: + err = fl_dump_key_vxlan_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; + case TUNNEL_ERSPAN_OPT: + err = fl_dump_key_erspan_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; default: goto nla_put_failure; } diff --combined net/sctp/socket.c index 0b485952a71c,4c3105d96dce..140a13cfc93b --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -384,7 -384,7 +384,7 @@@ static int sctp_do_bind(struct sock *sk } }
- if (snum && snum < inet_prot_sock(net) && + if (snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES;
@@@ -1061,7 -1061,7 +1061,7 @@@ static int sctp_connect_new_asoc(struc if (sctp_autobind(sk)) return -EAGAIN; } else { - if (ep->base.bind_addr.port < inet_prot_sock(net) && + if (inet_port_requires_bind_service(net, ep->base.bind_addr.port) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; } @@@ -3943,22 -3943,18 +3943,22 @@@ static int sctp_setsockopt_auto_asconf( */ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, char __user *optval, - unsigned int optlen) + unsigned int optlen, bool v2) { - struct sctp_paddrthlds val; + struct sctp_paddrthlds_v2 val; struct sctp_transport *trans; struct sctp_association *asoc; + int len;
- if (optlen < sizeof(struct sctp_paddrthlds)) + len = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds); + if (optlen < len) return -EINVAL; - if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, - sizeof(struct sctp_paddrthlds))) + if (copy_from_user(&val, optval, len)) return -EFAULT;
+ if (v2 && val.spt_pathpfthld > val.spt_pathcpthld) + return -EINVAL; + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { trans = sctp_addr_id2transport(sk, &val.spt_address, val.spt_assoc_id); @@@ -3967,8 -3963,6 +3967,8 @@@
if (val.spt_pathmaxrxt) trans->pathmaxrxt = val.spt_pathmaxrxt; + if (v2) + trans->ps_retrans = val.spt_pathcpthld; trans->pf_retrans = val.spt_pathpfthld;
return 0; @@@ -3984,23 -3978,17 +3984,23 @@@ transports) { if (val.spt_pathmaxrxt) trans->pathmaxrxt = val.spt_pathmaxrxt; + if (v2) + trans->ps_retrans = val.spt_pathcpthld; trans->pf_retrans = val.spt_pathpfthld; }
if (val.spt_pathmaxrxt) asoc->pathmaxrxt = val.spt_pathmaxrxt; + if (v2) + asoc->ps_retrans = val.spt_pathcpthld; asoc->pf_retrans = val.spt_pathpfthld; } else { struct sctp_sock *sp = sctp_sk(sk);
if (val.spt_pathmaxrxt) sp->pathmaxrxt = val.spt_pathmaxrxt; + if (v2) + sp->ps_retrans = val.spt_pathcpthld; sp->pf_retrans = val.spt_pathpfthld; }
@@@ -4601,40 -4589,6 +4601,40 @@@ out return retval; }
+static int sctp_setsockopt_pf_expose(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_value > SCTP_PF_EXPOSE_MAX) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + if (asoc) + asoc->pf_expose = params.assoc_value; + else + sctp_sk(sk)->pf_expose = params.assoc_value; + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@@ -4790,12 -4744,7 +4790,12 @@@ static int sctp_setsockopt(struct sock retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; case SCTP_PEER_ADDR_THLDS: - retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen); + retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen, + false); + break; + case SCTP_PEER_ADDR_THLDS_V2: + retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen, + true); break; case SCTP_RECVRCVINFO: retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen); @@@ -4849,9 -4798,6 +4849,9 @@@ case SCTP_ECN_SUPPORTED: retval = sctp_setsockopt_ecn_supported(sk, optval, optlen); break; + case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: + retval = sctp_setsockopt_pf_expose(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -5095,8 -5041,6 +5095,8 @@@ static int sctp_init_sock(struct sock * sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pf_retrans = net->sctp.pf_retrans; + sp->ps_retrans = net->sctp.ps_retrans; + sp->pf_expose = net->sctp.pf_expose; sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; @@@ -5577,16 -5521,8 +5577,16 @@@ static int sctp_getsockopt_peer_addr_in
transport = sctp_addr_id2transport(sk, &pinfo.spinfo_address, pinfo.spinfo_assoc_id); - if (!transport) - return -EINVAL; + if (!transport) { + retval = -EINVAL; + goto out; + } + + if (transport->state == SCTP_PF && + transport->asoc->pf_expose == SCTP_PF_EXPOSE_DISABLE) { + retval = -EACCES; + goto out; + }
pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc); pinfo.spinfo_state = transport->state; @@@ -7234,19 -7170,18 +7234,19 @@@ static int sctp_getsockopt_assoc_ids(st * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt */ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, - char __user *optval, - int len, - int __user *optlen) + char __user *optval, int len, + int __user *optlen, bool v2) { - struct sctp_paddrthlds val; + struct sctp_paddrthlds_v2 val; struct sctp_transport *trans; struct sctp_association *asoc; + int min;
- if (len < sizeof(struct sctp_paddrthlds)) + min = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds); + if (len < min) return -EINVAL; - len = sizeof(struct sctp_paddrthlds); - if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) + len = min; + if (copy_from_user(&val, optval, len)) return -EFAULT;
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { @@@ -7257,7 -7192,6 +7257,7 @@@
val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; + val.spt_pathcpthld = trans->ps_retrans;
goto out; } @@@ -7270,13 -7204,11 +7270,13 @@@ if (asoc) { val.spt_pathpfthld = asoc->pf_retrans; val.spt_pathmaxrxt = asoc->pathmaxrxt; + val.spt_pathcpthld = asoc->ps_retrans; } else { struct sctp_sock *sp = sctp_sk(sk);
val.spt_pathpfthld = sp->pf_retrans; val.spt_pathmaxrxt = sp->pathmaxrxt; + val.spt_pathcpthld = sp->ps_retrans; }
out: @@@ -7968,45 -7900,6 +7968,45 @@@ out return retval; }
+static int sctp_getsockopt_pf_expose(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->pf_expose + : sctp_sk(sk)->pf_expose; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@@ -8156,12 -8049,7 +8156,12 @@@ retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); break; case SCTP_PEER_ADDR_THLDS: - retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen); + retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, + optlen, false); + break; + case SCTP_PEER_ADDR_THLDS_V2: + retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, + optlen, true); break; case SCTP_GET_ASSOC_STATS: retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen); @@@ -8224,9 -8112,6 +8224,9 @@@ case SCTP_ECN_SUPPORTED: retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen); break; + case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: + retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -8267,7 -8152,6 +8267,7 @@@ static int sctp_get_port_local(struct s struct sctp_sock *sp = sctp_sk(sk); bool reuse = (sk->sk_reuse || sp->reuse); struct sctp_bind_hashbucket *head; /* hash list */ + struct net *net = sock_net(sk); kuid_t uid = sock_i_uid(sk); struct sctp_bind_bucket *pp; unsigned short snum; @@@ -8283,6 -8167,7 +8283,6 @@@ /* Search for an available port. */ int low, high, remaining, index; unsigned int rover; - struct net *net = sock_net(sk);
inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; @@@ -8294,12 -8179,12 +8294,12 @@@ rover = low; if (inet_is_local_reserved_port(net, rover)) continue; - index = sctp_phashfn(sock_net(sk), rover); + index = sctp_phashfn(net, rover); head = &sctp_port_hashtable[index]; spin_lock(&head->lock); sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && - net_eq(sock_net(sk), pp->net)) + net_eq(net, pp->net)) goto next; break; next: @@@ -8323,10 -8208,10 +8323,10 @@@ * to the port number (snum) - we detect that with the * port iterator, pp being NULL. */ - head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; + head = &sctp_port_hashtable[sctp_phashfn(net, snum)]; spin_lock(&head->lock); sctp_for_each_hentry(pp, &head->chain) { - if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) + if ((pp->port == snum) && net_eq(pp->net, net)) goto pp_found; } } @@@ -8382,7 -8267,7 +8382,7 @@@ pp_found pp_not_found: /* If there was a hash table miss, create a new port. */ ret = 1; - if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum))) + if (!pp && !(pp = sctp_bucket_create(head, net, snum))) goto fail_unlock;
/* In either case (hit or miss), make sure fastreuse is 1 only @@@ -8491,7 -8376,7 +8491,7 @@@ static int sctp_listen_start(struct soc } }
- sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); return sctp_hash_endpoint(ep); }
@@@ -8545,7 -8430,7 +8545,7 @@@ int sctp_inet_listen(struct socket *soc
/* If we are already listening, just update the backlog */ if (sctp_sstate(sk, LISTENING)) - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); else { err = sctp_listen_start(sk, backlog); if (err) @@@ -8591,7 -8476,7 +8591,7 @@@ __poll_t sctp_poll(struct file *file, s mask = 0;
/* Is there any exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) @@@ -8600,7 -8485,7 +8600,7 @@@ mask |= EPOLLHUP;
/* Is it readable? Reconsider this code with TCP-style support. */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM;
/* The association is either gone or not ready. */ @@@ -8986,7 -8871,7 +8986,7 @@@ struct sk_buff *sctp_skb_recv_datagram( if (sk_can_busy_loop(sk)) { sk_busy_loop(sk, noblock);
- if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; }
@@@ -9421,7 -9306,7 +9421,7 @@@ void sctp_copy_sock(struct sock *newsk newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = asoc->next_tsn ^ jiffies; + newinet->inet_id = prandom_u32();
newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@@ -9615,12 -9500,12 +9615,12 @@@ struct proto sctp_prot = .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp_sock), .useroffset = offsetof(struct sctp_sock, subscribe), .usersize = offsetof(struct sctp_sock, initmsg) - offsetof(struct sctp_sock, subscribe) + - sizeof_field(struct sctp_sock, initmsg), + sizeof_member(struct sctp_sock, initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, @@@ -9657,12 -9542,12 +9657,12 @@@ struct proto sctpv6_prot = .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp6_sock), .useroffset = offsetof(struct sctp6_sock, sctp.subscribe), .usersize = offsetof(struct sctp6_sock, sctp.initmsg) - offsetof(struct sctp6_sock, sctp.subscribe) + - sizeof_field(struct sctp6_sock, sctp.initmsg), + sizeof_member(struct sctp6_sock, sctp.initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, diff --combined net/unix/af_unix.c index 7cfdce10de36,ee9b2d8684c3..f0a074356012 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@@ -284,9 -284,11 +284,9 @@@ static struct sock *__unix_find_socket_
if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) - goto found; + return s; } - s = NULL; -found: - return s; + return NULL; }
static inline struct sock *unix_find_socket_byname(struct net *net, @@@ -644,9 -646,6 +644,9 @@@ static __poll_t unix_poll(struct file * static __poll_t unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); +#ifdef CONFIG_COMPAT +static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +#endif static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); @@@ -688,9 -687,6 +688,9 @@@ static const struct proto_ops unix_stre .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@@ -714,9 -710,6 +714,9 @@@ static const struct proto_ops unix_dgra .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = sock_no_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@@ -739,9 -732,6 +739,9 @@@ static const struct proto_ops unix_seqp .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@@ -2592,13 -2582,6 +2592,13 @@@ static int unix_ioctl(struct socket *so return err; }
+#ifdef CONFIG_COMPAT +static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; @@@ -2616,7 -2599,7 +2616,7 @@@ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */ @@@ -2645,7 -2628,7 +2645,7 @@@ static __poll_t unix_dgram_poll(struct mask = 0;
/* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
@@@ -2655,7 -2638,7 +2655,7 @@@ mask |= EPOLLHUP;
/* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */ @@@ -2865,7 -2848,7 +2865,7 @@@ static int __init af_unix_init(void { int rc = -1;
- BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_member(struct sk_buff, cb));
rc = proto_register(&unix_proto, 1); if (rc != 0) { diff --combined security/integrity/ima/ima_policy.c index f19a895ad7cd,18bf8e2d4f95..936cba30d27c --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@@ -45,7 -45,7 +45,7 @@@ #define DONT_HASH 0x0200
#define INVALID_PCR(a) (((a) < 0) || \ - (a) >= (FIELD_SIZEOF(struct integrity_iint_cache, measured_pcrs) * 8)) + (a) >= (sizeof_member(struct integrity_iint_cache, measured_pcrs) * 8))
int ima_policy_flag; static int temp_ima_appraise; @@@ -274,7 -274,7 +274,7 @@@ static struct ima_rule_entry *ima_lsm_c * lsm rules can change */ memcpy(nentry, entry, sizeof(*nentry)); - memset(nentry->lsm, 0, FIELD_SIZEOF(struct ima_rule_entry, lsm)); + memset(nentry->lsm, 0, sizeof_member(struct ima_rule_entry, lsm));
for (i = 0; i < MAX_LSM_RULES; i++) { if (!entry->lsm[i].rule) @@@ -765,8 -765,8 +765,8 @@@ enum Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq, Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt, Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt, - Opt_appraise_type, Opt_permit_directio, - Opt_pcr, Opt_template, Opt_err + Opt_appraise_type, Opt_appraise_flag, + Opt_permit_directio, Opt_pcr, Opt_template, Opt_err };
static const match_table_t policy_tokens = { @@@ -798,7 -798,6 +798,7 @@@ {Opt_euid_lt, "euid<%s"}, {Opt_fowner_lt, "fowner<%s"}, {Opt_appraise_type, "appraise_type=%s"}, + {Opt_appraise_flag, "appraise_flag=%s"}, {Opt_permit_directio, "permit_directio"}, {Opt_pcr, "pcr=%s"}, {Opt_template, "template=%s"}, @@@ -1173,11 -1172,6 +1173,11 @@@ static int ima_parse_rule(char *rule, s else result = -EINVAL; break; + case Opt_appraise_flag: + ima_log_string(ab, "appraise_flag", args[0].from); + if (strstr(args[0].from, "blacklist")) + entry->flags |= IMA_CHECK_BLACKLIST; + break; case Opt_permit_directio: entry->flags |= IMA_PERMIT_DIRECTIO; break; @@@ -1506,8 -1500,6 +1506,8 @@@ int ima_policy_show(struct seq_file *m else seq_puts(m, "appraise_type=imasig "); } + if (entry->flags & IMA_CHECK_BLACKLIST) + seq_puts(m, "appraise_flag=check_blacklist "); if (entry->flags & IMA_PERMIT_DIRECTIO) seq_puts(m, "permit_directio "); rcu_read_unlock(); diff --combined sound/soc/codecs/hdmi-codec.c index f8b5b960e597,c8b1ac22b2b2..841d1c608fe4 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@@ -274,7 -274,7 +274,7 @@@ struct hdmi_codec_priv uint8_t eld[MAX_ELD_BYTES]; struct snd_pcm_chmap *chmap_info; unsigned int chmap_idx; - struct mutex lock; + unsigned long busy; struct snd_soc_jack *jack; unsigned int jack_status; }; @@@ -292,7 -292,7 +292,7 @@@ static int hdmi_eld_ctl_info(struct snd struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES; - uinfo->count = FIELD_SIZEOF(struct hdmi_codec_priv, eld); + uinfo->count = sizeof_member(struct hdmi_codec_priv, eld);
return 0; } @@@ -390,8 -390,8 +390,8 @@@ static int hdmi_codec_startup(struct sn struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); int ret = 0;
- ret = mutex_trylock(&hcp->lock); - if (!ret) { + ret = test_and_set_bit(0, &hcp->busy); + if (ret) { dev_err(dai->dev, "Only one simultaneous stream supported!\n"); return -EINVAL; } @@@ -419,7 -419,7 +419,7 @@@
err: /* Release the exclusive lock on error */ - mutex_unlock(&hcp->lock); + clear_bit(0, &hcp->busy); return ret; }
@@@ -431,7 -431,7 +431,7 @@@ static void hdmi_codec_shutdown(struct hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN; hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data);
- mutex_unlock(&hcp->lock); + clear_bit(0, &hcp->busy); }
static int hdmi_codec_hw_params(struct snd_pcm_substream *substream, @@@ -811,6 -811,8 +811,6 @@@ static int hdmi_codec_probe(struct plat return -ENOMEM;
hcp->hcd = *hcd; - mutex_init(&hcp->lock); - daidrv = devm_kcalloc(dev, dai_count, sizeof(*daidrv), GFP_KERNEL); if (!daidrv) return -ENOMEM; diff --combined virt/kvm/kvm_main.c index 00268290dcbd,777d3b125072..74003e85f371 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@@ -50,7 -50,6 +50,7 @@@ #include <linux/bsearch.h> #include <linux/io.h> #include <linux/lockdep.h> +#include <linux/kthread.h>
#include <asm/processor.h> #include <asm/ioctl.h> @@@ -122,22 -121,9 +122,22 @@@ static long kvm_vcpu_compat_ioctl(struc unsigned long arg); #define KVM_COMPAT(c) .compat_ioctl = (c) #else +/* + * For architectures that don't implement a compat infrastructure, + * adopt a double line of defense: + * - Prevent a compat task from opening /dev/kvm + * - If the open has been done by a 64bit task, and the KVM fd + * passed to a compat task, let the ioctls fail. + */ static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } -#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl + +static int kvm_no_compat_open(struct inode *inode, struct file *file) +{ + return is_compat_task() ? -ENODEV : 0; +} +#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ + .open = kvm_no_compat_open #endif static int hardware_enable_all(void); static void hardware_disable_all(void); @@@ -163,30 -149,10 +163,30 @@@ __weak int kvm_arch_mmu_notifier_invali return 0; }
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) +{ + /* + * The metadata used by is_zone_device_page() to determine whether or + * not a page is ZONE_DEVICE is guaranteed to be valid if and only if + * the device has been pinned, e.g. by get_user_pages(). WARN if the + * page_count() is zero to help detect bad usage of this helper. + */ + if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) + return false; + + return is_zone_device_page(pfn_to_page(pfn)); +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { + /* + * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting + * perspective they are "normal" pages, albeit with slightly different + * usage rules. + */ if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)) && + !kvm_is_zone_device_pfn(pfn);
return true; } @@@ -659,28 -625,10 +659,28 @@@ static int kvm_create_vm_debugfs(struc return 0; }
+/* + * Called after the VM is otherwise initialized, but just before adding it to + * the vm_list. + */ +int __weak kvm_arch_post_init_vm(struct kvm *kvm) +{ + return 0; +} + +/* + * Called just after removing the VM from the vm_list, but before doing any + * other destruction. + */ +void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ +} + static struct kvm *kvm_create_vm(unsigned long type) { - int r, i; struct kvm *kvm = kvm_arch_alloc_vm(); + int r = -ENOMEM; + int i;
if (!kvm) return ERR_PTR(-ENOMEM); @@@ -692,50 -640,45 +692,50 @@@ mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); - refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices);
- r = kvm_arch_init_vm(kvm, type); - if (r) - goto out_err_no_disable; - - r = hardware_enable_all(); - if (r) - goto out_err_no_disable; - -#ifdef CONFIG_HAVE_KVM_IRQFD - INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); -#endif - BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
- r = -ENOMEM; + if (init_srcu_struct(&kvm->srcu)) + goto out_err_no_srcu; + if (init_srcu_struct(&kvm->irq_srcu)) + goto out_err_no_irq_srcu; + + refcount_set(&kvm->users_count, 1); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { struct kvm_memslots *slots = kvm_alloc_memslots(); + if (!slots) - goto out_err_no_srcu; + goto out_err_no_arch_destroy_vm; /* Generations must be different for each address space. */ slots->generation = i; rcu_assign_pointer(kvm->memslots[i], slots); }
- if (init_srcu_struct(&kvm->srcu)) - goto out_err_no_srcu; - if (init_srcu_struct(&kvm->irq_srcu)) - goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); if (!kvm->buses[i]) - goto out_err; + goto out_err_no_arch_destroy_vm; }
+ r = kvm_arch_init_vm(kvm, type); + if (r) + goto out_err_no_arch_destroy_vm; + + r = hardware_enable_all(); + if (r) + goto out_err_no_disable; + +#ifdef CONFIG_HAVE_KVM_IRQFD + INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); +#endif + r = kvm_init_mmu_notifier(kvm); + if (r) + goto out_err_no_mmu_notifier; + + r = kvm_arch_post_init_vm(kvm); if (r) goto out_err;
@@@ -748,24 -691,17 +748,24 @@@ return kvm;
out_err: - cleanup_srcu_struct(&kvm->irq_srcu); -out_err_no_irq_srcu: - cleanup_srcu_struct(&kvm->srcu); -out_err_no_srcu: +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + if (kvm->mmu_notifier.ops) + mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); +#endif +out_err_no_mmu_notifier: hardware_disable_all(); out_err_no_disable: - refcount_set(&kvm->users_count, 0); + kvm_arch_destroy_vm(kvm); +out_err_no_arch_destroy_vm: + WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); + cleanup_srcu_struct(&kvm->irq_srcu); +out_err_no_irq_srcu: + cleanup_srcu_struct(&kvm->srcu); +out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); @@@ -797,8 -733,6 +797,8 @@@ static void kvm_destroy_vm(struct kvm * mutex_lock(&kvm_lock); list_del(&kvm->vm_list); mutex_unlock(&kvm_lock); + kvm_arch_pre_destroy_vm(kvm); + kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@@ -838,18 -772,6 +838,18 @@@ void kvm_put_kvm(struct kvm *kvm } EXPORT_SYMBOL_GPL(kvm_put_kvm);
+/* + * Used to put a reference that was taken on behalf of an object associated + * with a user-visible file descriptor, e.g. a vcpu or device, if installation + * of the new file descriptor fails and the reference cannot be transferred to + * its final owner. In such cases, the caller is still actively using @kvm and + * will fail miserably if the refcount unexpectedly hits zero. + */ +void kvm_put_kvm_no_destroy(struct kvm *kvm) +{ + WARN_ON(refcount_dec_and_test(&kvm->users_count)); +} +EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy);
static int kvm_vm_release(struct inode *inode, struct file *filp) { @@@ -1931,7 -1853,7 +1931,7 @@@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty
void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { struct page *page = pfn_to_page(pfn);
SetPageDirty(page); @@@ -1941,7 -1863,7 +1941,7 @@@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty)
void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); @@@ -2438,23 -2360,20 +2438,23 @@@ out kvm_arch_vcpu_unblocking(vcpu); block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- if (!vcpu_valid_wakeup(vcpu)) - shrink_halt_poll_ns(vcpu); - else if (halt_poll_ns) { - if (block_ns <= vcpu->halt_poll_ns) - ; - /* we had a long block, shrink polling */ - else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + if (!kvm_arch_no_poll(vcpu)) { + if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < halt_poll_ns && - block_ns < halt_poll_ns) - grow_halt_poll_ns(vcpu); - } else - vcpu->halt_poll_ns = 0; + } else if (halt_poll_ns) { + if (block_ns <= vcpu->halt_poll_ns) + ; + /* we had a long block, shrink polling */ + else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + shrink_halt_poll_ns(vcpu); + /* we had a short halt and our poll time is too small */ + else if (vcpu->halt_poll_ns < halt_poll_ns && + block_ns < halt_poll_ns) + grow_halt_poll_ns(vcpu); + } else { + vcpu->halt_poll_ns = 0; + } + }
trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu)); kvm_arch_vcpu_block_finish(vcpu); @@@ -2751,18 -2670,17 +2751,18 @@@ static int kvm_vm_ioctl_create_vcpu(str goto unlock_vcpu_destroy; }
- BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); + vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); + BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
/* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); if (r < 0) { - kvm_put_kvm(kvm); + kvm_put_kvm_no_destroy(kvm); goto unlock_vcpu_destroy; }
- kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; + kvm->vcpus[vcpu->vcpu_idx] = vcpu;
/* * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus @@@ -3128,14 -3046,14 +3128,14 @@@ struct kvm_device *kvm_device_from_filp return filp->private_data; }
-static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { +static const struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { #ifdef CONFIG_KVM_MPIC [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, #endif };
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) +int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type) { if (type >= ARRAY_SIZE(kvm_device_ops_table)) return -ENOSPC; @@@ -3156,7 -3074,7 +3156,7 @@@ void kvm_unregister_device_ops(u32 type static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { - struct kvm_device_ops *ops = NULL; + const struct kvm_device_ops *ops = NULL; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int type; @@@ -3196,7 -3114,7 +3196,7 @@@ kvm_get_kvm(kvm); ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { - kvm_put_kvm(kvm); + kvm_put_kvm_no_destroy(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); mutex_unlock(&kvm->lock); @@@ -4354,12 -4272,12 +4354,12 @@@ int kvm_init(void *opaque, unsigned vcp
r = kvm_arch_hardware_setup(); if (r < 0) - goto out_free_0a; + goto out_free_1;
for_each_online_cpu(cpu) { smp_call_function_single(cpu, check_processor_compat, &r, 1); if (r < 0) - goto out_free_1; + goto out_free_2; }
r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting", @@@ -4375,7 -4293,7 +4375,7 @@@ kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, SLAB_ACCOUNT, offsetof(struct kvm_vcpu, arch), - sizeof_field(struct kvm_vcpu, arch), + sizeof_member(struct kvm_vcpu, arch), NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; @@@ -4416,8 -4334,9 +4416,8 @@@ out_free_3 unregister_reboot_notifier(&kvm_reboot_notifier); cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); out_free_2: -out_free_1: kvm_arch_hardware_unsetup(); -out_free_0a: +out_free_1: free_cpumask_var(cpus_hardware_enabled); out_free_0: kvm_irqfd_exit(); @@@ -4445,86 -4364,3 +4445,86 @@@ void kvm_exit(void kvm_vfio_ops_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +struct kvm_vm_worker_thread_context { + struct kvm *kvm; + struct task_struct *parent; + struct completion init_done; + kvm_vm_thread_fn_t thread_fn; + uintptr_t data; + int err; +}; + +static int kvm_vm_worker_thread(void *context) +{ + /* + * The init_context is allocated on the stack of the parent thread, so + * we have to locally copy anything that is needed beyond initialization + */ + struct kvm_vm_worker_thread_context *init_context = context; + struct kvm *kvm = init_context->kvm; + kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; + uintptr_t data = init_context->data; + int err; + + err = kthread_park(current); + /* kthread_park(current) is never supposed to return an error */ + WARN_ON(err != 0); + if (err) + goto init_complete; + + err = cgroup_attach_task_all(init_context->parent, current); + if (err) { + kvm_err("%s: cgroup_attach_task_all failed with err %d\n", + __func__, err); + goto init_complete; + } + + set_user_nice(current, task_nice(init_context->parent)); + +init_complete: + init_context->err = err; + complete(&init_context->init_done); + init_context = NULL; + + if (err) + return err; + + /* Wait to be woken up by the spawner before proceeding. */ + kthread_parkme(); + + if (!kthread_should_stop()) + err = thread_fn(kvm, data); + + return err; +} + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr) +{ + struct kvm_vm_worker_thread_context init_context = {}; + struct task_struct *thread; + + *thread_ptr = NULL; + init_context.kvm = kvm; + init_context.parent = current; + init_context.thread_fn = thread_fn; + init_context.data = data; + init_completion(&init_context.init_done); + + thread = kthread_run(kvm_vm_worker_thread, &init_context, + "%s-%d", name, task_pid_nr(current)); + if (IS_ERR(thread)) + return PTR_ERR(thread); + + /* kthread_run is never supposed to return NULL */ + WARN_ON(thread == NULL); + + wait_for_completion(&init_context.init_done); + + if (!init_context.err) + *thread_ptr = thread; + + return init_context.err; +}