The following commit has been merged in the master branch:
commit 0a6eab70c5b8bc438cc85a997e3ee43cd9f2cf3d
Merge: eedcc9b50b62be43cf07b00187038a3bb0201223 ec2f877856e0af3889940e00b160f7f20f8d774f
Author: Stephen Rothwell <sfr(a)canb.auug.org.au>
Date: Fri Nov 29 12:20:58 2019 +1100
Merge remote-tracking branch 'kspp/for-next/kspp'
# Conflicts:
# arch/x86/kernel/fpu/xstate.c
diff --combined Documentation/process/coding-style.rst
index ada573b7d703,146b3a2c661c..e7c60f5339b2
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@@ -56,7 -56,7 +56,7 @@@ instead of ``double-indenting`` the ``c
case 'K':
case 'k':
mem <<= 10;
- /* fall through */
+ fallthrough;
default:
break;
}
@@@ -988,7 -988,7 +988,7 @@@ Similarly, if you need to calculate th
.. code-block:: c
- #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+ #define sizeof_member(t, f) (sizeof(((t*)0)->f))
There are also min() and max() macros that do strict type checking if you
need them. Feel free to peruse that header file to see what else is already
diff --combined arch/arm64/include/asm/processor.h
index 5ba63204d078,2d61a909984d..257b742b960d
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@@ -9,7 -9,7 +9,7 @@@
#define __ASM_PROCESSOR_H
#define KERNEL_DS UL(-1)
-#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1)
+#define USER_DS ((UL(1) << VA_BITS) - 1)
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
@@@ -26,12 -26,10 +26,12 @@@
#include <linux/init.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/thread_info.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/hw_breakpoint.h>
+#include <asm/kasan.h>
#include <asm/lse.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pointer_auth.h>
@@@ -154,13 -152,13 +154,13 @@@ static inline void arch_thread_struct_w
unsigned long *size)
{
/* Verify that there is no padding among the whitelisted fields: */
- BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
- sizeof_field(struct thread_struct, uw.tp_value) +
- sizeof_field(struct thread_struct, uw.tp2_value) +
- sizeof_field(struct thread_struct, uw.fpsimd_state));
+ BUILD_BUG_ON(sizeof_member(struct thread_struct, uw) !=
+ sizeof_member(struct thread_struct, uw.tp_value) +
+ sizeof_member(struct thread_struct, uw.tp2_value) +
+ sizeof_member(struct thread_struct, uw.fpsimd_state));
*offset = offsetof(struct thread_struct, uw);
- *size = sizeof_field(struct thread_struct, uw);
+ *size = sizeof_member(struct thread_struct, uw);
}
#ifdef CONFIG_COMPAT
@@@ -216,18 -214,6 +216,18 @@@ static inline void start_thread(struct
regs->sp = sp;
}
+static inline bool is_ttbr0_addr(unsigned long addr)
+{
+ /* entry assembly clears tags for TTBR0 addrs */
+ return addr < TASK_SIZE;
+}
+
+static inline bool is_ttbr1_addr(unsigned long addr)
+{
+ /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
#ifdef CONFIG_COMPAT
static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
diff --combined arch/x86/kernel/fpu/xstate.c
index 319be936c348,023b0a28e13b..4b03ce58537d
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@@ -60,7 -60,7 +60,7 @@@ u64 xfeatures_mask __read_mostly
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] =
-1};
/*
* The XSAVE area of kernel can be in standard or compacted format;
@@@ -254,13 -254,10 +254,13 @@@ static void __init setup_xstate_feature
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_offsets[0] = 0;
- xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
- xstate_offsets[1] = xstate_sizes[0];
- xstate_sizes[1] = sizeof_member(struct fxregs_state, xmm_space);
+ xstate_offsets[XFEATURE_FP] = 0;
+ xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
+ xmm_space);
+
+ xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
- xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state,
- xmm_space);
++ xstate_sizes[XFEATURE_SSE] = sizeof_member(struct fxregs_state,
++ xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!xfeature_enabled(i))
@@@ -345,7 -342,7 +345,7 @@@ static int xfeature_is_aligned(int xfea
*/
static void __init setup_xstate_comp(void)
{
- unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+ unsigned int xstate_comp_sizes[XFEATURE_MAX];
int i;
/*
@@@ -353,9 -350,8 +353,9 @@@
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_comp_offsets[0] = 0;
- xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+ xstate_comp_offsets[XFEATURE_FP] = 0;
+ xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
+ xmm_space);
if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
@@@ -844,7 -840,7 +844,7 @@@ void *get_xsave_addr(struct xregs_stat
/*
* We should not ever be requesting features that we
- * have not enabled. Remember that pcntxt_mask is
+ * have not enabled. Remember that xfeatures_mask is
* what we write to the XCR0 register.
*/
WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
diff --combined block/blk-core.c
index 9c121f142a5d,fd6b31d400e7..550e80a955f2
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@@ -132,9 -132,6 +132,9 @@@ static const char *const blk_op_name[]
REQ_OP_NAME(SECURE_ERASE),
REQ_OP_NAME(ZONE_RESET),
REQ_OP_NAME(ZONE_RESET_ALL),
+ REQ_OP_NAME(ZONE_OPEN),
+ REQ_OP_NAME(ZONE_CLOSE),
+ REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
@@@ -187,22 -184,6 +187,22 @@@ static const struct
[BLK_STS_IOERR] = { -EIO, "I/O" },
};
+#ifdef CONFIG_BLK_NOTIFICATIONS
+static const
+enum block_notification_type blk_notifications[ARRAY_SIZE(blk_errors)] = {
+ [BLK_STS_TIMEOUT] = NOTIFY_BLOCK_ERROR_TIMEOUT,
+ [BLK_STS_NOSPC] = NOTIFY_BLOCK_ERROR_NO_SPACE,
+ [BLK_STS_TRANSPORT] = NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT,
+ [BLK_STS_TARGET] = NOTIFY_BLOCK_ERROR_CRITICAL_TARGET,
+ [BLK_STS_NEXUS] = NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS,
+ [BLK_STS_MEDIUM] = NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM,
+ [BLK_STS_PROTECTION] = NOTIFY_BLOCK_ERROR_PROTECTION,
+ [BLK_STS_RESOURCE] = NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE,
+ [BLK_STS_DEV_RESOURCE] = NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE,
+ [BLK_STS_IOERR] = NOTIFY_BLOCK_ERROR_IO,
+};
+#endif
+
blk_status_t errno_to_blk_status(int errno)
{
int i;
@@@ -243,19 -224,6 +243,19 @@@ static void print_req_error(struct requ
req->cmd_flags & ~REQ_OP_MASK,
req->nr_phys_segments,
IOPRIO_PRIO_CLASS(req->ioprio));
+
+#ifdef CONFIG_BLK_NOTIFICATIONS
+ if (blk_notifications[idx]) {
+ struct block_notification n = {
+ .watch.type = WATCH_TYPE_BLOCK_NOTIFY,
+ .watch.subtype = blk_notifications[idx],
+ .watch.info = watch_sizeof(n),
+ .dev = req->rq_disk ? disk_devt(req->rq_disk) : 0,
+ .sector = blk_rq_pos(req),
+ };
+ post_block_notification(&n);
+ }
+#endif
}
static void req_bio_endio(struct request *rq, struct bio *bio,
@@@ -368,14 -336,14 +368,14 @@@ EXPORT_SYMBOL_GPL(blk_set_queue_dying)
*/
void blk_cleanup_queue(struct request_queue *q)
{
+ WARN_ON_ONCE(blk_queue_registered(q));
+
/* mark @q DYING, no new request or merges will be allowed afterwards */
- mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- mutex_unlock(&q->sysfs_lock);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
@@@ -880,7 -848,11 +880,7 @@@ static inline int blk_partition_remap(s
if (unlikely(bio_check_ro(bio, p)))
goto out;
- /*
- * Zone reset does not include bi_size so bio_sectors() is always 0.
- * Include a test for the reset op code and perform the remap if needed.
- */
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+ if (bio_sectors(bio)) {
if (bio_check_eod(bio, part_nr_sects_read(p)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
@@@ -964,9 -936,6 +964,9 @@@ generic_make_request_checks(struct bio
goto not_supported;
break;
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
@@@ -1821,9 -1790,9 +1821,9 @@@ int __init blk_dev_init(void
{
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
- FIELD_SIZEOF(struct request, cmd_flags));
+ sizeof_member(struct request, cmd_flags));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
- FIELD_SIZEOF(struct bio, bi_opf));
+ sizeof_member(struct bio, bi_opf));
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
diff --combined crypto/adiantum.c
index aded26092268,d79d901afd28..cb7ba22e3a62
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@@ -33,7 -33,6 +33,7 @@@
#include <crypto/b128ops.h>
#include <crypto/chacha.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
#include <crypto/internal/skcipher.h>
#include <crypto/nhpoly1305.h>
#include <crypto/scatterwalk.h>
@@@ -243,11 -242,11 +243,11 @@@ static void adiantum_hash_header(struc
BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
poly1305_core_blocks(&state, &tctx->header_hash_key,
- &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+ &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
- TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+ TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
poly1305_core_emit(&state, &rctx->header_hash);
}
@@@ -436,10 -435,10 +436,10 @@@ static int adiantum_init_tfm(struct cry
BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) !=
sizeof(struct adiantum_request_ctx));
- subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx,
+ subreq_size = max(sizeof_member(struct adiantum_request_ctx,
u.hash_desc) +
crypto_shash_descsize(hash),
- FIELD_SIZEOF(struct adiantum_request_ctx,
+ sizeof_member(struct adiantum_request_ctx,
u.streamcipher_req) +
crypto_skcipher_reqsize(streamcipher));
diff --combined crypto/essiv.c
index 808f2b362106,fd5a87baafff..75d810d98f8e
--- a/crypto/essiv.c
+++ b/crypto/essiv.c
@@@ -188,7 -188,8 +188,7 @@@ static void essiv_aead_done(struct cryp
struct aead_request *req = areq->data;
struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
- if (rctx->assoc)
- kfree(rctx->assoc);
+ kfree(rctx->assoc);
aead_request_complete(req, err);
}
@@@ -347,7 -348,7 +347,7 @@@ static int essiv_aead_init_tfm(struct c
if (IS_ERR(aead))
return PTR_ERR(aead);
- subreq_size = FIELD_SIZEOF(struct essiv_aead_request_ctx, aead_req) +
+ subreq_size = sizeof_member(struct essiv_aead_request_ctx, aead_req) +
crypto_aead_reqsize(aead);
tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) +
@@@ -485,7 -486,7 +485,7 @@@ static int essiv_create(struct crypto_t
type = algt->type & algt->mask;
switch (type) {
- case CRYPTO_ALG_TYPE_BLKCIPHER:
+ case CRYPTO_ALG_TYPE_SKCIPHER:
skcipher_inst = kzalloc(sizeof(*skcipher_inst) +
sizeof(*ictx), GFP_KERNEL);
if (!skcipher_inst)
@@@ -585,7 -586,7 +585,7 @@@
base->cra_alignmask = block_base->cra_alignmask;
base->cra_priority = block_base->cra_priority;
- if (type == CRYPTO_ALG_TYPE_BLKCIPHER) {
+ if (type == CRYPTO_ALG_TYPE_SKCIPHER) {
skcipher_inst->alg.setkey = essiv_skcipher_setkey;
skcipher_inst->alg.encrypt = essiv_skcipher_encrypt;
skcipher_inst->alg.decrypt = essiv_skcipher_decrypt;
@@@ -627,7 -628,7 +627,7 @@@
out_free_hash:
crypto_mod_put(_hash_alg);
out_drop_skcipher:
- if (type == CRYPTO_ALG_TYPE_BLKCIPHER)
+ if (type == CRYPTO_ALG_TYPE_SKCIPHER)
crypto_drop_skcipher(&ictx->u.skcipher_spawn);
else
crypto_drop_aead(&ictx->u.aead_spawn);
diff --combined drivers/firmware/efi/efi.c
index d101f072c8f8,17d32fb72c9a..0b6b0c19a189
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@@ -81,11 -81,6 +81,11 @@@ bool efi_runtime_disabled(void
return disable_runtime;
}
+bool __pure __efi_soft_reserve_enabled(void)
+{
+ return !efi_enabled(EFI_MEM_NO_SOFT_RESERVE);
+}
+
static int __init parse_efi_cmdline(char *str)
{
if (!str) {
@@@ -99,9 -94,6 +99,9 @@@
if (parse_option_str(str, "noruntime"))
disable_runtime = true;
+ if (parse_option_str(str, "nosoftreserve"))
+ set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
+
return 0;
}
early_param("efi", parse_efi_cmdline);
@@@ -275,9 -267,6 +275,9 @@@ static __init int efivar_ssdt_load(void
void *data;
int ret;
+ if (!efivar_ssdt[0])
+ return 0;
+
ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
list_for_each_entry_safe(entry, aux, &entries, list) {
@@@ -304,7 -293,7 +304,7 @@@
goto free_data;
}
- ret = acpi_load_table(data);
+ ret = acpi_load_table(data, NULL);
if (ret) {
pr_err("failed to load table: %d\n", ret);
goto free_data;
@@@ -562,7 -551,7 +562,7 @@@ int __init efi_config_parse_tables(voi
sizeof(*seed) + size);
if (seed != NULL) {
pr_notice("seeding entropy pool\n");
- add_device_randomness(seed->bits, seed->size);
+ add_bootloader_randomness(seed->bits, seed->size);
early_memunmap(seed, sizeof(*seed) + size);
} else {
pr_err("Could not map UEFI random seed!\n");
@@@ -681,7 -670,7 +681,7 @@@ device_initcall(efi_load_efivars)
{ name }, \
{ prop }, \
offsetof(struct efi_fdt_params, field), \
- FIELD_SIZEOF(struct efi_fdt_params, field) \
+ sizeof_member(struct efi_fdt_params, field) \
}
struct params {
@@@ -850,16 -839,15 +850,16 @@@ char * __init efi_md_typeattr_format(ch
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
- EFI_MEMORY_NV |
+ EFI_MEMORY_NV | EFI_MEMORY_SP |
EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]",
(unsigned long long)attr);
else
snprintf(pos, size,
- "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+ "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+ attr & EFI_MEMORY_SP ? "SP" : "",
attr & EFI_MEMORY_NV ? "NV" : "",
attr & EFI_MEMORY_XP ? "XP" : "",
attr & EFI_MEMORY_RP ? "RP" : "",
diff --combined drivers/gpu/drm/i915/gvt/scheduler.c
index 5b2a7d072ec9,76c2db58adcd..12c69712c5b6
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@@ -38,7 -38,6 +38,7 @@@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h"
#include "gt/intel_context.h"
+#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "gvt.h"
@@@ -195,7 -194,7 +195,7 @@@ static int populate_shadow_context(stru
return -EFAULT;
}
- page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
+ page = i915_gem_object_get_page(ctx_obj, i);
dst = kmap(page);
intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
I915_GTT_PAGE_SIZE);
@@@ -366,8 -365,7 +366,8 @@@ static void set_context_ppgtt_from_shad
struct i915_gem_context *ctx)
{
struct intel_vgpu_mm *mm = workload->shadow_mm;
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm);
+ struct i915_ppgtt *ppgtt =
+ i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx));
int i = 0;
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
@@@ -380,8 -378,6 +380,8 @@@
px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
}
}
+
+ i915_vm_put(&ppgtt->vm);
}
static int
@@@ -389,8 -385,11 +389,8 @@@ intel_gvt_workload_req_alloc(struct int
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct i915_request *rq;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
if (workload->req)
return 0;
@@@ -416,9 -415,10 +416,9 @@@ int intel_gvt_scan_and_shadow_workload(
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ lockdep_assert_held(&vgpu->vgpu_lock);
if (workload->shadow)
return 0;
@@@ -580,6 -580,8 +580,6 @@@ static void update_vreg_in_ctx(struct i
static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
{
- struct intel_vgpu *vgpu = workload->vgpu;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct intel_vgpu_shadow_bb *bb, *pos;
if (list_empty(&workload->shadow_bb))
@@@ -588,6 -590,8 +588,6 @@@
bb = list_first_entry(&workload->shadow_bb,
struct intel_vgpu_shadow_bb, list);
- mutex_lock(&dev_priv->drm.struct_mutex);
-
list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
if (bb->obj) {
if (bb->accessing)
@@@ -605,6 -609,8 +605,6 @@@
list_del(&bb->list);
kfree(bb);
}
-
- mutex_unlock(&dev_priv->drm.struct_mutex);
}
static int prepare_workload(struct intel_vgpu_workload *workload)
@@@ -679,6 -685,7 +679,6 @@@ err_unpin_mm
static int dispatch_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct i915_request *rq;
int ring_id = workload->ring_id;
int ret;
@@@ -687,6 -694,7 +687,6 @@@
ring_id, workload);
mutex_lock(&vgpu->vgpu_lock);
- mutex_lock(&dev_priv->drm.struct_mutex);
ret = intel_gvt_workload_req_alloc(workload);
if (ret)
@@@ -721,6 -729,7 +721,6 @@@ out
err_req:
if (ret)
workload->status = ret;
- mutex_unlock(&dev_priv->drm.struct_mutex);
mutex_unlock(&vgpu->vgpu_lock);
return ret;
}
@@@ -835,7 -844,7 +835,7 @@@ static void update_guest_context(struc
return;
}
- page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
+ page = i915_gem_object_get_page(ctx_obj, i);
src = kmap(page);
intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
I915_GTT_PAGE_SIZE);
@@@ -878,7 -887,7 +878,7 @@@ void intel_vgpu_clean_workloads(struct
intel_engine_mask_t tmp;
/* free the unsubmited workloads in the queues. */
- for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
list_for_each_entry_safe(pos, n,
&s->workload_q_head[engine->id], list) {
list_del_init(&pos->list);
@@@ -1224,18 -1233,20 +1224,18 @@@ int intel_vgpu_setup_submission(struct
struct intel_vgpu_submission *s = &vgpu->submission;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
+ struct i915_ppgtt *ppgtt;
enum intel_engine_id i;
int ret;
- mutex_lock(&i915->drm.struct_mutex);
-
ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX);
- if (IS_ERR(ctx)) {
- ret = PTR_ERR(ctx);
- goto out_unlock;
- }
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
i915_gem_context_set_force_single_submission(ctx);
- i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm));
+ ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx));
+ i915_context_ppgtt_root_save(s, ppgtt);
for_each_engine(engine, i915, i) {
struct intel_context *ce;
@@@ -1269,7 -1280,7 +1269,7 @@@
sizeof(struct intel_vgpu_workload), 0,
SLAB_HWCACHE_ALIGN,
offsetof(struct intel_vgpu_workload, rb_tail),
- sizeof_field(struct intel_vgpu_workload, rb_tail),
+ sizeof_member(struct intel_vgpu_workload, rb_tail),
NULL);
if (!s->workloads) {
@@@ -1280,12 -1291,12 +1280,12 @@@
atomic_set(&s->running_workload_num, 0);
bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
+ i915_vm_put(&ppgtt->vm);
i915_gem_context_put(ctx);
- mutex_unlock(&i915->drm.struct_mutex);
return 0;
out_shadow_ctx:
- i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm));
+ i915_context_ppgtt_root_restore(s, ppgtt);
for_each_engine(engine, i915, i) {
if (IS_ERR(s->shadow[i]))
break;
@@@ -1293,8 -1304,9 +1293,8 @@@
intel_context_unpin(s->shadow[i]);
intel_context_put(s->shadow[i]);
}
+ i915_vm_put(&ppgtt->vm);
i915_gem_context_put(ctx);
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
return ret;
}
@@@ -1585,7 -1597,9 +1585,7 @@@ intel_vgpu_create_workload(struct intel
*/
if (list_empty(workload_q_head(vgpu, ring_id))) {
intel_runtime_pm_get(&dev_priv->runtime_pm);
- mutex_lock(&dev_priv->drm.struct_mutex);
ret = intel_gvt_scan_and_shadow_workload(workload);
- mutex_unlock(&dev_priv->drm.struct_mutex);
intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
}
diff --combined drivers/infiniband/hw/efa/efa_verbs.c
index c9d294caa27a,d5b396913875..0d4177c51881
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@@ -13,6 -13,10 +13,6 @@@
#include "efa.h"
-#define EFA_MMAP_FLAG_SHIFT 56
-#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
-#define EFA_MMAP_INVALID U64_MAX
-
enum {
EFA_MMAP_DMA_PAGE = 0,
EFA_MMAP_IO_WC,
@@@ -23,12 -27,20 +23,12 @@@
(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-struct efa_mmap_entry {
- void *obj;
+struct efa_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
u64 address;
- u64 length;
- u32 mmap_page;
u8 mmap_flag;
};
-static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
-{
- return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
- ((u64)efa->mmap_page << PAGE_SHIFT);
-}
-
#define EFA_DEFINE_STATS(op) \
op(EFA_TX_BYTES, "tx_bytes") \
op(EFA_TX_PKTS, "tx_pkts") \
@@@ -70,6 -82,8 +70,6 @@@ static const char *const efa_stats_name
#define EFA_CHUNK_USED_SIZE \
((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
-#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
-
struct pbl_chunk {
dma_addr_t dma_addr;
u64 *buf;
@@@ -133,19 -147,8 +133,19 @@@ static inline struct efa_ah *to_eah(str
return container_of(ibah, struct efa_ah, ibah);
}
+static inline struct efa_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
+}
+
+static inline bool is_rdma_read_cap(struct efa_dev *dev)
+{
+ return dev->dev_attr.device_caps &
EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
+}
+
#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
- FIELD_SIZEOF(typeof(x), fld) <= (sz))
+ sizeof_member(typeof(x), fld) <= (sz))
#define is_reserved_cleared(reserved) \
!memchr_inv(reserved, 0, sizeof(reserved))
@@@ -169,6 -172,106 +169,6 @@@ static void *efa_zalloc_mapped(struct e
return addr;
}
-/*
- * This is only called when the ucontext is destroyed and there can be no
- * concurrent query via mmap or allocate on the xarray, thus we can be sure no
- * other thread is using the entry pointer. We also know that all the BAR
- * pages have either been zap'd or munmaped at this point. Normal pages are
- * refcounted and will be freed at the proper time.
- */
-static void mmap_entries_remove_free(struct efa_dev *dev,
- struct efa_ucontext *ucontext)
-{
- struct efa_mmap_entry *entry;
- unsigned long mmap_page;
-
- xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
- xa_erase(&ucontext->mmap_xa, mmap_page);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, get_mmap_key(entry), entry->address,
- entry->length);
- if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
- /* DMA mapping is already gone, now free the pages */
- free_pages_exact(phys_to_virt(entry->address),
- entry->length);
- kfree(entry);
- }
-}
-
-static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
- struct efa_ucontext *ucontext,
- u64 key, u64 len)
-{
- struct efa_mmap_entry *entry;
- u64 mmap_page;
-
- mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
- if (mmap_page > U32_MAX)
- return NULL;
-
- entry = xa_load(&ucontext->mmap_xa, mmap_page);
- if (!entry || get_mmap_key(entry) != key || entry->length != len)
- return NULL;
-
- ibdev_dbg(&dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, key, entry->address, entry->length);
-
- return entry;
-}
-
-/*
- * Note this locking scheme cannot support removal of entries, except during
- * ucontext destruction when the core code guarentees no concurrency.
- */
-static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
- void *obj, u64 address, u64 length, u8 mmap_flag)
-{
- struct efa_mmap_entry *entry;
- u32 next_mmap_page;
- int err;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return EFA_MMAP_INVALID;
-
- entry->obj = obj;
- entry->address = address;
- entry->length = length;
- entry->mmap_flag = mmap_flag;
-
- xa_lock(&ucontext->mmap_xa);
- if (check_add_overflow(ucontext->mmap_xa_page,
- (u32)(length >> PAGE_SHIFT),
- &next_mmap_page))
- goto err_unlock;
-
- entry->mmap_page = ucontext->mmap_xa_page;
- ucontext->mmap_xa_page = next_mmap_page;
- err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
- GFP_KERNEL);
- if (err)
- goto err_unlock;
-
- xa_unlock(&ucontext->mmap_xa);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
- entry->obj, entry->address, entry->length, get_mmap_key(entry));
-
- return get_mmap_key(entry);
-
-err_unlock:
- xa_unlock(&ucontext->mmap_xa);
- kfree(entry);
- return EFA_MMAP_INVALID;
-
-}
-
int efa_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *udata)
@@@ -203,17 -306,12 +203,17 @@@
dev_attr->max_rq_depth);
props->max_send_sge = dev_attr->max_sq_sge;
props->max_recv_sge = dev_attr->max_rq_sge;
+ props->max_sge_rd = dev_attr->max_wr_rdma_sge;
if (udata && udata->outlen) {
resp.max_sq_sge = dev_attr->max_sq_sge;
resp.max_rq_sge = dev_attr->max_rq_sge;
resp.max_sq_wr = dev_attr->max_sq_depth;
resp.max_rq_wr = dev_attr->max_rq_depth;
+ resp.max_rdma_size = dev_attr->max_rdma_size;
+
+ if (is_rdma_read_cap(dev))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
@@@ -240,9 -338,9 +240,9 @@@ int efa_query_port(struct ib_device *ib
props->pkey_tbl_len = 1;
props->active_speed = IB_SPEED_EDR;
props->active_width = IB_WIDTH_4X;
- props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
- props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
- props->max_msg_sz = dev->mtu;
+ props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->max_msg_sz = dev->dev_attr.mtu;
props->max_vl_num = 1;
return 0;
@@@ -303,7 -401,7 +303,7 @@@ int efa_query_gid(struct ib_device *ibd
{
struct efa_dev *dev = to_edev(ibdev);
- memcpy(gid->raw, dev->addr, sizeof(dev->addr));
+ memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
return 0;
}
@@@ -387,19 -485,8 +387,19 @@@ static int efa_destroy_qp_handle(struc
return efa_com_destroy_qp(&dev->edev, ¶ms);
}
+static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx,
+ struct efa_qp *qp)
+{
+ rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
+}
+
int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
+ struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
+ struct efa_ucontext, ibucontext);
struct efa_dev *dev = to_edev(ibqp->pd->device);
struct efa_qp *qp = to_eqp(ibqp);
int err;
@@@ -418,101 -505,61 +418,101 @@@
DMA_TO_DEVICE);
}
+ efa_qp_user_mmap_entries_remove(ucontext, qp);
kfree(qp);
return 0;
}
+static struct rdma_user_mmap_entry*
+efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ u64 address, size_t length,
+ u8 mmap_flag, u64 *offset)
+{
+ struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ int err;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_flag = mmap_flag;
+
+ err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
+ length);
+ if (err) {
+ kfree(entry);
+ return NULL;
+ }
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
static int qp_mmap_entries_setup(struct efa_qp *qp,
struct efa_dev *dev,
struct efa_ucontext *ucontext,
struct efa_com_create_qp_params *params,
struct efa_ibv_create_qp_resp *resp)
{
- /*
- * Once an entry is inserted it might be mmapped, hence cannot be
- * cleaned up until dealloc_ucontext.
- */
- resp->sq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->sq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+ size_t length;
+ u64 address;
+
+ address = dev->db_bar_addr + resp->sq_db_offset;
+ qp->sq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address,
+ PAGE_SIZE, EFA_MMAP_IO_NC,
+ &resp->sq_db_mmap_key);
+ if (!qp->sq_db_mmap_entry)
return -ENOMEM;
resp->sq_db_offset &= ~PAGE_MASK;
- resp->llq_desc_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->mem_bar_addr + resp->llq_desc_offset,
- PAGE_ALIGN(params->sq_ring_size_in_bytes +
- (resp->llq_desc_offset & ~PAGE_MASK)),
- EFA_MMAP_IO_WC);
- if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = dev->mem_bar_addr + resp->llq_desc_offset;
+ length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
+ (resp->llq_desc_offset & ~PAGE_MASK));
+
+ qp->llq_desc_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, length,
+ EFA_MMAP_IO_WC,
+ &resp->llq_desc_mmap_key);
+ if (!qp->llq_desc_mmap_entry)
+ goto err_remove_mmap;
resp->llq_desc_offset &= ~PAGE_MASK;
if (qp->rq_size) {
- resp->rq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->rq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = dev->db_bar_addr + resp->rq_db_offset;
+
+ qp->rq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, PAGE_SIZE,
+ EFA_MMAP_IO_NC,
+ &resp->rq_db_mmap_key);
+ if (!qp->rq_db_mmap_entry)
+ goto err_remove_mmap;
resp->rq_db_offset &= ~PAGE_MASK;
- resp->rq_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- virt_to_phys(qp->rq_cpu_addr),
- qp->rq_size, EFA_MMAP_DMA_PAGE);
- if (resp->rq_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = virt_to_phys(qp->rq_cpu_addr);
+ qp->rq_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, qp->rq_size,
+ EFA_MMAP_DMA_PAGE,
+ &resp->rq_mmap_key);
+ if (!qp->rq_mmap_entry)
+ goto err_remove_mmap;
resp->rq_mmap_size = qp->rq_size;
}
return 0;
+
+err_remove_mmap:
+ efa_qp_user_mmap_entries_remove(ucontext, qp);
+
+ return -ENOMEM;
}
static int efa_qp_validate_cap(struct efa_dev *dev,
@@@ -587,6 -634,7 +587,6 @@@ struct ib_qp *efa_create_qp(struct ib_p
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_ibv_create_qp_resp resp = {};
struct efa_ibv_create_qp cmd = {};
- bool rq_entry_inserted = false;
struct efa_ucontext *ucontext;
struct efa_qp *qp;
int err;
@@@ -694,6 -742,7 +694,6 @@@
if (err)
goto err_destroy_qp;
- rq_entry_inserted = true;
qp->qp_handle = create_qp_resp.qp_handle;
qp->ibqp.qp_num = create_qp_resp.qp_num;
qp->ibqp.qp_type = init_attr->qp_type;
@@@ -710,7 -759,7 +710,7 @@@
ibdev_dbg(&dev->ibdev,
"Failed to copy udata for qp[%u]\n",
create_qp_resp.qp_num);
- goto err_destroy_qp;
+ goto err_remove_mmap_entries;
}
}
@@@ -718,16 -767,13 +718,16 @@@
return &qp->ibqp;
+err_remove_mmap_entries:
+ efa_qp_user_mmap_entries_remove(ucontext, qp);
err_destroy_qp:
efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
err_free_mapped:
if (qp->rq_size) {
dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
DMA_TO_DEVICE);
- if (!rq_entry_inserted)
+
+ if (!qp->rq_mmap_entry)
free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
}
err_free_qp:
@@@ -851,18 -897,16 +851,18 @@@ void efa_destroy_cq(struct ib_cq *ibcq
efa_destroy_cq_idx(dev, cq->cq_idx);
dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
}
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
struct efa_ibv_create_cq_resp *resp)
{
resp->q_mmap_size = cq->size;
- resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
- virt_to_phys(cq->cpu_addr),
- cq->size, EFA_MMAP_DMA_PAGE);
- if (resp->q_mmap_key == EFA_MMAP_INVALID)
+ cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+ virt_to_phys(cq->cpu_addr),
+ cq->size, EFA_MMAP_DMA_PAGE,
+ &resp->q_mmap_key);
+ if (!cq->mmap_entry)
return -ENOMEM;
return 0;
@@@ -880,6 -924,7 +880,6 @@@ int efa_create_cq(struct ib_cq *ibcq, c
struct efa_dev *dev = to_edev(ibdev);
struct efa_ibv_create_cq cmd = {};
struct efa_cq *cq = to_ecq(ibcq);
- bool cq_entry_inserted = false;
int entries = attr->cqe;
int err;
@@@ -968,13 -1013,15 +968,13 @@@
goto err_destroy_cq;
}
- cq_entry_inserted = true;
-
if (udata->outlen) {
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
if (err) {
ibdev_dbg(ibdev,
"Failed to copy udata for create_cq\n");
- goto err_destroy_cq;
+ goto err_remove_mmap;
}
}
@@@ -983,16 -1030,13 +983,16 @@@
return 0;
+err_remove_mmap:
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
err_destroy_cq:
efa_destroy_cq_idx(dev, cq->cq_idx);
err_free_mapped:
dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
- if (!cq_entry_inserted)
+ if (!cq->mmap_entry)
free_pages_exact(cq->cpu_addr, cq->size);
+
err_out:
atomic64_inc(&dev->stats.sw_stats.create_cq_err);
return err;
@@@ -1352,7 -1396,6 +1352,7 @@@ struct ib_mr *efa_reg_mr(struct ib_pd *
struct efa_com_reg_mr_params params = {};
struct efa_com_reg_mr_result result = {};
struct pbl_context pbl;
+ int supp_access_flags;
unsigned int pg_sz;
struct efa_mr *mr;
int inline_size;
@@@ -1366,14 -1409,10 +1366,14 @@@
goto err_out;
}
- if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
+ supp_access_flags =
+ IB_ACCESS_LOCAL_WRITE |
+ (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
+
+ if (access_flags & ~supp_access_flags) {
ibdev_dbg(&dev->ibdev,
"Unsupported access flags[%#x], supported[%#x]\n",
- access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
+ access_flags, supp_access_flags);
err = -EOPNOTSUPP;
goto err_out;
}
@@@ -1384,7 -1423,7 +1384,7 @@@
goto err_out;
}
- mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+ mr->umem = ib_umem_get(udata, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
ibdev_dbg(&dev->ibdev,
@@@ -1395,7 -1434,7 +1395,7 @@@
params.pd = to_epd(ibpd)->pdn;
params.iova = virt_addr;
params.mr_length_in_bytes = length;
- params.permissions = access_flags & 0x1;
+ params.permissions = access_flags;
pg_sz = ib_umem_find_best_pgsz(mr->umem,
dev->dev_attr.page_size_cap,
@@@ -1517,6 -1556,7 +1517,6 @@@ int efa_alloc_ucontext(struct ib_uconte
goto err_out;
ucontext->uarn = result.uarn;
- xa_init(&ucontext->mmap_xa);
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
@@@ -1545,56 -1585,38 +1545,56 @@@ void efa_dealloc_ucontext(struct ib_uco
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- mmap_entries_remove_free(dev, ucontext);
efa_dealloc_uar(dev, ucontext->uarn);
}
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ /* DMA mapping is already gone, now free the pages */
+ if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
+ free_pages_exact(phys_to_virt(entry->address),
+ entry->rdma_entry.npages * PAGE_SIZE);
+ kfree(entry);
+}
+
static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
- struct vm_area_struct *vma, u64 key, u64 length)
+ struct vm_area_struct *vma)
{
- struct efa_mmap_entry *entry;
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct efa_user_mmap_entry *entry;
unsigned long va;
+ int err = 0;
u64 pfn;
- int err;
- entry = mmap_entry_get(dev, ucontext, key, length);
- if (!entry) {
- ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
- key);
+ rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev,
+ "pgoff[%#lx] does not have valid entry\n",
+ vma->vm_pgoff);
return -EINVAL;
}
+ entry = to_emmap(rdma_entry);
ibdev_dbg(&dev->ibdev,
- "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
- entry->address, length, entry->mmap_flag);
+ "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag);
pfn = entry->address >> PAGE_SHIFT;
switch (entry->mmap_flag) {
case EFA_MMAP_IO_NC:
- err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
- pgprot_noncached(vma->vm_page_prot));
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
break;
case EFA_MMAP_IO_WC:
- err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
- pgprot_writecombine(vma->vm_page_prot));
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
break;
case EFA_MMAP_DMA_PAGE:
for (va = vma->vm_start; va < vma->vm_end;
@@@ -1611,13 -1633,12 +1611,13 @@@
if (err) {
ibdev_dbg(
&dev->ibdev,
- "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
- entry->address, length, entry->mmap_flag, err);
- return err;
+ "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag, err);
}
- return 0;
+ rdma_user_mmap_entry_put(rdma_entry);
+ return err;
}
int efa_mmap(struct ib_ucontext *ibucontext,
@@@ -1625,13 -1646,26 +1625,13 @@@
{
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- u64 length = vma->vm_end - vma->vm_start;
- u64 key = vma->vm_pgoff << PAGE_SHIFT;
+ size_t length = vma->vm_end - vma->vm_start;
ibdev_dbg(&dev->ibdev,
- "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
- vma->vm_start, vma->vm_end, length, key);
-
- if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
- ibdev_dbg(&dev->ibdev,
- "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set
[%#lx]\n",
- length, PAGE_SIZE, vma->vm_flags);
- return -EINVAL;
- }
-
- if (vma->vm_flags & VM_EXEC) {
- ibdev_dbg(&dev->ibdev, "Mapping executable pages is not
permitted\n");
- return -EPERM;
- }
+ "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+ vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
- return __efa_mmap(dev, ucontext, vma, key, length);
+ return __efa_mmap(dev, ucontext, vma);
}
static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
diff --combined drivers/infiniband/hw/hfi1/sdma.c
index 5774dfc22e18,a772955bd2ad..09df585ecee6
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@@ -65,7 -65,6 +65,7 @@@
#define SDMA_DESCQ_CNT 2048
#define SDMA_DESC_INTR 64
#define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
module_param(sdma_descq_cnt, uint, S_IRUGO);
@@@ -848,7 -847,7 +848,7 @@@ static const struct rhashtable_params s
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
- .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .key_len = sizeof_member(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
@@@ -881,8 -880,8 +881,8 @@@ struct sdma_engine *sdma_select_user_en
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
- sdma_rht_params);
+ rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
struct sdma_rht_map_elem *map = rht_node->map[vl];
@@@ -1297,7 -1296,7 +1297,7 @@@ void sdma_clean(struct hfi1_devdata *dd
struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
- dma_free_coherent(&dd->pcidev->dev, 4,
+ dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
@@@ -1492,7 -1491,7 +1492,7 @@@ int sdma_init(struct hfi1_devdata *dd,
}
/* Allocate memory for pad */
- dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
+ dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
&dd->sdma_pad_phys, GFP_KERNEL);
if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
@@@ -1527,11 -1526,8 +1527,11 @@@
}
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(tmp_sdma_rht);
goto bail;
+ }
+
dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
diff --combined drivers/infiniband/hw/hfi1/verbs.h
index b0e9bf7cd150,95a0c0b73387..c673cdca5689
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@@ -107,9 -107,9 +107,9 @@@ enum
HFI1_HAS_GRH = (1 << 0),
};
- #define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh))
+ #define LRH_16B_BYTES (sizeof_member(struct hfi1_16b_header, lrh))
#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
- #define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
+ #define LRH_9B_BYTES (sizeof_member(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
/* 24Bits for qpn, upper 8Bits reserved */
@@@ -330,8 -330,9 +330,8 @@@ void hfi1_sys_guid_chg(struct hfi1_ibpo
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
/*
* The PSN_MASK and PSN_SHIFT allow for
diff --combined drivers/md/raid5-ppl.c
index cab5b1352892,51e070a9c5e6..fb4e31f19c53
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@@ -1360,7 -1360,7 +1360,7 @@@ int ppl_init_log(struct r5conf *conf
return -EINVAL;
}
- max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) *
+ max_disks = sizeof_member(struct ppl_log, disk_flush_bitmap) *
BITS_PER_BYTE;
if (conf->raid_disks > max_disks) {
pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
@@@ -1404,7 -1404,7 +1404,7 @@@
atomic64_set(&ppl_conf->seq, 0);
INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
spin_lock_init(&ppl_conf->no_mem_stripes_lock);
- ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET;
+ ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
if (!mddev->external) {
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 7c7038676d6d,491acfe19e2a..291c1d3fc06a
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@@ -55,8 -55,6 +55,8 @@@
#define HCLGE_LINK_STATUS_MS 10
+#define HCLGE_VF_VPORT_START_NUM 1
+
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
static int hclge_init_vlan_config(struct hclge_dev *hdev);
static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
@@@ -325,7 -323,8 +325,7 @@@ static const struct hclge_mac_mgr_tbl_e
{
.flags = HCLGE_MAC_MGR_MASK_VLAN_B,
.ethter_type = cpu_to_le16(ETH_P_LLDP),
- .mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)),
- .mac_addr_lo16 = cpu_to_le16(htons(0x000E)),
+ .mac_addr = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e},
.i_port_bitmap = 0x1,
},
};
@@@ -907,9 -906,6 +907,9 @@@ static int hclge_query_pf_resource(stru
hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+ /* nic's msix numbers is always equals to the roce's. */
+ hdev->num_nic_msi = hdev->num_roce_msi;
+
/* PF should have NIC vectors and Roce vectors,
* NIC vectors are queued before Roce vectors.
*/
@@@ -919,15 -915,6 +919,15 @@@
hdev->num_msi =
hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+ hdev->num_nic_msi = hdev->num_msi;
+ }
+
+ if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) {
+ dev_err(&hdev->pdev->dev,
+ "Just %u msi resources, not enough for pf(min:2).\n",
+ hdev->num_nic_msi);
+ return -EINVAL;
}
return 0;
@@@ -1195,35 -1182,6 +1195,35 @@@ static void hclge_parse_link_mode(struc
hclge_parse_backplane_link_mode(hdev, speed_ability);
}
+static u32 hclge_get_max_speed(u8 speed_ability)
+{
+ if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+ return HCLGE_MAC_SPEED_100G;
+
+ if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+ return HCLGE_MAC_SPEED_50G;
+
+ if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+ return HCLGE_MAC_SPEED_40G;
+
+ if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+ return HCLGE_MAC_SPEED_25G;
+
+ if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+ return HCLGE_MAC_SPEED_10G;
+
+ if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+ return HCLGE_MAC_SPEED_1G;
+
+ if (speed_ability & HCLGE_SUPPORT_100M_BIT)
+ return HCLGE_MAC_SPEED_100M;
+
+ if (speed_ability & HCLGE_SUPPORT_10M_BIT)
+ return HCLGE_MAC_SPEED_10M;
+
+ return HCLGE_MAC_SPEED_1G;
+}
+
static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
{
struct hclge_cfg_param_cmd *req;
@@@ -1394,11 -1352,9 +1394,11 @@@ static int hclge_configure(struct hclge
hclge_parse_link_mode(hdev, cfg.speed_ability);
+ hdev->hw.mac.max_speed = hclge_get_max_speed(cfg.speed_ability);
+
if ((hdev->tc_max > HNAE3_MAX_TC) ||
(hdev->tc_max < 1)) {
- dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
+ dev_warn(&hdev->pdev->dev, "TC num = %u.\n",
hdev->tc_max);
hdev->tc_max = 1;
}
@@@ -1551,10 -1507,6 +1551,10 @@@ static int hclge_assign_tqp(struct hcl
kinfo->rss_size = min_t(u16, hdev->rss_size_max,
vport->alloc_tqps / hdev->tm_info.num_tc);
+ /* ensure one to one mapping between irq and queue at default */
+ kinfo->rss_size = min_t(u16, kinfo->rss_size,
+ (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc);
+
return 0;
}
@@@ -1658,7 -1610,7 +1658,7 @@@ static int hclge_alloc_vport(struct hcl
num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
if (hdev->num_tqps < num_vport) {
- dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)",
+ dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)",
hdev->num_tqps, num_vport);
return -EINVAL;
}
@@@ -1681,7 -1633,6 +1681,7 @@@
for (i = 0; i < num_vport; i++) {
vport->back = hdev;
vport->vport_id = i;
+ vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO;
vport->mps = HCLGE_MAC_DEFAULT_FRAME;
vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
vport->rxvlan_cfg.rx_vlan_offload_en = true;
@@@ -2334,8 -2285,7 +2334,8 @@@ static int hclge_init_msi(struct hclge_
int vectors;
int i;
- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+ hdev->num_msi,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (vectors < 0) {
dev_err(&pdev->dev,
@@@ -2345,12 -2295,11 +2345,12 @@@
}
if (vectors < hdev->num_msi)
dev_warn(&hdev->pdev->dev,
- "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+ "requested %u MSI/MSI-X, but allocated %d MSI/MSI-X\n",
hdev->num_msi, vectors);
hdev->num_msi = vectors;
hdev->num_msi_left = vectors;
+
hdev->base_msi_vector = pdev->irq;
hdev->roce_base_vector = hdev->base_msi_vector +
hdev->roce_base_msix_offset;
@@@ -2777,7 -2726,7 +2777,7 @@@ static void hclge_update_port_capabilit
else if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
mac->module_type = HNAE3_MODULE_TYPE_TP;
- if (mac->support_autoneg == true) {
+ if (mac->support_autoneg) {
linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mac->supported);
linkmode_copy(mac->advertising, mac->supported);
} else {
@@@ -2904,62 -2853,6 +2904,62 @@@ static int hclge_get_status(struct hnae
return hdev->hw.mac.link;
}
+static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
+{
+ if (pci_num_vf(hdev->pdev) == 0) {
+ dev_err(&hdev->pdev->dev,
+ "SRIOV is disabled, can not get vport(%d) info.\n", vf);
+ return NULL;
+ }
+
+ if (vf < 0 || vf >= pci_num_vf(hdev->pdev)) {
+ dev_err(&hdev->pdev->dev,
+ "vf id(%d) is out of range(0 <= vfid < %d)\n",
+ vf, pci_num_vf(hdev->pdev));
+ return NULL;
+ }
+
+ /* VF start from 1 in vport */
+ vf += HCLGE_VF_VPORT_START_NUM;
+ return &hdev->vport[vf];
+}
+
+static int hclge_get_vf_config(struct hnae3_handle *handle, int vf,
+ struct ifla_vf_info *ivf)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ ivf->vf = vf;
+ ivf->linkstate = vport->vf_info.link_state;
+ ivf->spoofchk = vport->vf_info.spoofchk;
+ ivf->trusted = vport->vf_info.trusted;
+ ivf->min_tx_rate = 0;
+ ivf->max_tx_rate = vport->vf_info.max_tx_rate;
+ ether_addr_copy(ivf->mac, vport->vf_info.mac);
+
+ return 0;
+}
+
+static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
+ int link_state)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ vport->vf_info.link_state = link_state;
+
+ return 0;
+}
+
static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
{
u32 rst_src_reg, cmdq_src_reg, msix_src_reg;
@@@ -3280,7 -3173,7 +3280,7 @@@ static int hclge_reset_wait(struct hclg
if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
dev_err(&hdev->pdev->dev,
- "flr wait timeout: %d\n", cnt);
+ "flr wait timeout: %u\n", cnt);
return -EBUSY;
}
@@@ -3330,7 -3223,7 +3330,7 @@@ static int hclge_set_all_vf_rst(struct
ret = hclge_set_vf_rst(hdev, vport->vport_id, reset);
if (ret) {
dev_err(&hdev->pdev->dev,
- "set vf(%d) rst failed %d!\n",
+ "set vf(%u) rst failed %d!\n",
vport->vport_id, ret);
return ret;
}
@@@ -3345,7 -3238,7 +3345,7 @@@
ret = hclge_inform_reset_assert_to_vf(vport);
if (ret)
dev_warn(&hdev->pdev->dev,
- "inform reset to vf(%d) failed %d!\n",
+ "inform reset to vf(%u) failed %d!\n",
vport->vport_id, ret);
}
@@@ -3658,7 -3551,7 +3658,7 @@@ static bool hclge_reset_err_handle(stru
hdev->rst_stats.reset_fail_cnt++;
set_bit(hdev->reset_type, &hdev->reset_pending);
dev_info(&hdev->pdev->dev,
- "re-schedule reset task(%d)\n",
+ "re-schedule reset task(%u)\n",
hdev->rst_stats.reset_fail_cnt);
return true;
}
@@@ -3669,9 -3562,6 +3669,9 @@@
hclge_reset_handshake(hdev, true);
dev_err(&hdev->pdev->dev, "Reset fail!\n");
+
+ hclge_dbg_dump_rst_info(hdev);
+
return false;
}
@@@ -3679,28 -3569,12 +3679,28 @@@ static int hclge_set_rst_done(struct hc
{
struct hclge_pf_rst_done_cmd *req;
struct hclge_desc desc;
+ int ret;
req = (struct hclge_pf_rst_done_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PF_RST_DONE, false);
req->pf_rst_done |= HCLGE_PF_RESET_DONE_BIT;
- return hclge_cmd_send(&hdev->hw, &desc, 1);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ /* To be compatible with the old firmware, which does not support
+ * command HCLGE_OPC_PF_RST_DONE, just print a warning and
+ * return success
+ */
+ if (ret == -EOPNOTSUPP) {
+ dev_warn(&hdev->pdev->dev,
+ "current firmware does not support command(0x%x)!\n",
+ HCLGE_OPC_PF_RST_DONE);
+ return 0;
+ } else if (ret) {
+ dev_err(&hdev->pdev->dev, "assert PF reset done fail %d!\n",
+ ret);
+ }
+
+ return ret;
}
static int hclge_reset_prepare_up(struct hclge_dev *hdev)
@@@ -3871,13 -3745,12 +3871,13 @@@ static void hclge_reset_event(struct pc
HCLGE_RESET_INTERVAL))) {
mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
return;
- } else if (hdev->default_reset_request)
+ } else if (hdev->default_reset_request) {
hdev->reset_level =
hclge_get_reset_level(ae_dev,
&hdev->default_reset_request);
- else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ)))
+ } else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ))) {
hdev->reset_level = HNAE3_FUNC_RESET;
+ }
dev_info(&hdev->pdev->dev, "received reset event, reset type is
%d\n",
hdev->reset_level);
@@@ -4002,7 -3875,6 +4002,7 @@@ static void hclge_service_task(struct w
hclge_update_link_status(hdev);
hclge_update_vport_alive(hdev);
hclge_sync_vlan_filter(hdev);
+
if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
hclge_rfs_filter_expire(hdev);
hdev->fd_arfs_expire_timer = 0;
@@@ -4031,7 -3903,6 +4031,7 @@@ static int hclge_get_vector(struct hnae
int alloc = 0;
int i, j;
+ vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num);
vector_num = min(hdev->num_msi_left, vector_num);
for (j = 0; j < vector_num; j++) {
@@@ -4509,7 -4380,7 +4509,7 @@@ int hclge_rss_init_hw(struct hclge_dev
*/
if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) {
dev_err(&hdev->pdev->dev,
- "Configure rss tc size failed, invalid TC_SIZE = %d\n",
+ "Configure rss tc size failed, invalid TC_SIZE = %u\n",
rss_size);
return -EINVAL;
}
@@@ -4687,8 -4558,8 +4687,8 @@@ static int hclge_unmap_ring_frm_vector(
return ret;
}
-int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
- struct hclge_promisc_param *param)
+static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+ struct hclge_promisc_param *param)
{
struct hclge_promisc_cfg_cmd *req;
struct hclge_desc desc;
@@@ -4715,9 -4586,8 +4715,9 @@@
return ret;
}
-void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
- bool en_mc, bool en_bc, int vport_id)
+static void hclge_promisc_param_init(struct hclge_promisc_param *param,
+ bool en_uc, bool en_mc, bool en_bc,
+ int vport_id)
{
if (!param)
return;
@@@ -4732,21 -4602,12 +4732,21 @@@
param->vf_id = vport_id;
}
+int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
+ bool en_mc_pmc, bool en_bc_pmc)
+{
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_promisc_param param;
+
+ hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc,
+ vport->vport_id);
+ return hclge_cmd_set_promisc_mode(hdev, ¶m);
+}
+
static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
bool en_mc_pmc)
{
struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_dev *hdev = vport->back;
- struct hclge_promisc_param param;
bool en_bc_pmc = true;
/* For revision 0x20, if broadcast promisc enabled, vlan filter is
@@@ -4756,8 -4617,9 +4756,8 @@@
if (handle->pdev->revision == 0x20)
en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false;
- hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc,
- vport->vport_id);
- return hclge_cmd_set_promisc_mode(hdev, ¶m);
+ return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc,
+ en_bc_pmc);
}
static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
@@@ -4859,7 -4721,7 +4859,7 @@@ static int hclge_init_fd_config(struct
break;
default:
dev_err(&hdev->pdev->dev,
- "Unsupported flow director mode %d\n",
+ "Unsupported flow director mode %u\n",
hdev->fd_cfg.fd_mode);
return -EOPNOTSUPP;
}
@@@ -5189,7 -5051,7 +5189,7 @@@ static int hclge_config_key(struct hclg
true);
if (ret) {
dev_err(&hdev->pdev->dev,
- "fd key_y config fail, loc=%d, ret=%d\n",
+ "fd key_y config fail, loc=%u, ret=%d\n",
rule->queue_id, ret);
return ret;
}
@@@ -5198,7 -5060,7 +5198,7 @@@
true);
if (ret)
dev_err(&hdev->pdev->dev,
- "fd key_x config fail, loc=%d, ret=%d\n",
+ "fd key_x config fail, loc=%u, ret=%d\n",
rule->queue_id, ret);
return ret;
}
@@@ -5447,7 -5309,7 +5447,7 @@@ static int hclge_fd_update_rule_list(st
}
} else if (!is_add) {
dev_err(&hdev->pdev->dev,
- "delete fail, rule %d is inexistent\n",
+ "delete fail, rule %u is inexistent\n",
location);
return -EINVAL;
}
@@@ -5687,7 -5549,7 +5687,7 @@@ static int hclge_add_fd_entry(struct hn
if (vf > hdev->num_req_vfs) {
dev_err(&hdev->pdev->dev,
- "Error: vf id (%d) > max vf num (%d)\n",
+ "Error: vf id (%u) > max vf num (%u)\n",
vf, hdev->num_req_vfs);
return -EINVAL;
}
@@@ -5697,7 -5559,7 +5697,7 @@@
if (ring >= tqps) {
dev_err(&hdev->pdev->dev,
- "Error: queue id (%d) > max tqp num (%d)\n",
+ "Error: queue id (%u) > max tqp num (%u)\n",
ring, tqps - 1);
return -EINVAL;
}
@@@ -5756,7 -5618,7 +5756,7 @@@ static int hclge_del_fd_entry(struct hn
if (!hclge_fd_rule_exist(hdev, fs->location)) {
dev_err(&hdev->pdev->dev,
- "Delete fail, rule %d is inexistent\n", fs->location);
+ "Delete fail, rule %u is inexistent\n", fs->location);
return -ENOENT;
}
@@@ -5833,7 -5695,7 +5833,7 @@@ static int hclge_restore_fd_entries(str
if (ret) {
dev_warn(&hdev->pdev->dev,
- "Restore rule %d failed, remove it\n",
+ "Restore rule %u failed, remove it\n",
rule->location);
clear_bit(rule->location, hdev->fd_bmap);
hlist_del(&rule->rule_node);
@@@ -6366,23 -6228,11 +6366,23 @@@ static int hclge_config_switch_param(st
func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0);
req = (struct hclge_mac_vlan_switch_cmd *)desc.data;
+
+ /* read current config parameter */
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM,
- false);
+ true);
req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL;
req->func_id = cpu_to_le32(func_id);
- req->switch_param = switch_param;
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "read mac vlan switch parameter fail, ret = %d\n", ret);
+ return ret;
+ }
+
+ /* modify and write new config parameter */
+ hclge_cmd_reuse_desc(&desc, false);
+ req->switch_param = (req->switch_param & param_mask) | switch_param;
req->param_mask = param_mask;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@@ -6838,7 -6688,7 +6838,7 @@@ static int hclge_get_mac_vlan_cmd_statu
if (cmdq_resp) {
dev_err(&hdev->pdev->dev,
- "cmdq execute failed for get_mac_vlan_cmd_status,status=%d.\n",
+ "cmdq execute failed for get_mac_vlan_cmd_status,status=%u.\n",
cmdq_resp);
return -EIO;
}
@@@ -7090,7 -6940,7 +7090,7 @@@ static int hclge_init_umv_space(struct
if (allocated_size < hdev->wanted_umv_size)
dev_warn(&hdev->pdev->dev,
- "Alloc umv space failed, want %d, get %d\n",
+ "Alloc umv space failed, want %u, get %u\n",
hdev->wanted_umv_size, allocated_size);
mutex_init(&hdev->umv_mutex);
@@@ -7258,7 -7108,7 +7258,7 @@@ int hclge_add_uc_addr_common(struct hcl
/* check if we just hit the duplicate */
if (!ret) {
- dev_warn(&hdev->pdev->dev, "VF %d mac(%pM) exists\n",
+ dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n",
vport->vport_id, addr);
return 0;
}
@@@ -7439,7 -7289,7 +7439,7 @@@ void hclge_rm_vport_mac_table(struct hc
mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC;
list_for_each_entry_safe(mac_cfg, tmp, list, node) {
- if (strncmp(mac_cfg->mac_addr, mac_addr, ETH_ALEN) == 0) {
+ if (ether_addr_equal(mac_cfg->mac_addr, mac_addr)) {
if (uc_flag && mac_cfg->hd_tbl_status)
hclge_rm_uc_addr_common(vport, mac_addr);
@@@ -7511,7 -7361,7 +7511,7 @@@ static int hclge_get_mac_ethertype_cmd_
if (cmdq_resp) {
dev_err(&hdev->pdev->dev,
- "cmdq execute failed for get_mac_ethertype_cmd_status, status=%d.\n",
+ "cmdq execute failed for get_mac_ethertype_cmd_status, status=%u.\n",
cmdq_resp);
return -EIO;
}
@@@ -7533,7 -7383,7 +7533,7 @@@
break;
default:
dev_err(&hdev->pdev->dev,
- "add mac ethertype failed for undefined, code=%d.\n",
+ "add mac ethertype failed for undefined, code=%u.\n",
resp_code);
return_status = -EIO;
}
@@@ -7541,67 -7391,6 +7541,67 @@@
return return_status;
}
+static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx,
+ u8 *mac_addr)
+{
+ struct hclge_mac_vlan_tbl_entry_cmd req;
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_desc desc;
+ u16 egress_port = 0;
+ int i;
+
+ if (is_zero_ether_addr(mac_addr))
+ return false;
+
+ memset(&req, 0, sizeof(req));
+ hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
+ HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
+ req.egress_port = cpu_to_le16(egress_port);
+ hclge_prepare_mac_addr(&req, mac_addr, false);
+
+ if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT)
+ return true;
+
+ vf_idx += HCLGE_VF_VPORT_START_NUM;
+ for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++)
+ if (i != vf_idx &&
+ ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac))
+ return true;
+
+ return false;
+}
+
+static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
+ u8 *mac_addr)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ if (ether_addr_equal(mac_addr, vport->vf_info.mac)) {
+ dev_info(&hdev->pdev->dev,
+ "Specified MAC(=%pM) is same as before, no change committed!\n",
+ mac_addr);
+ return 0;
+ }
+
+ if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) {
+ dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n",
+ mac_addr);
+ return -EEXIST;
+ }
+
+ ether_addr_copy(vport->vf_info.mac, mac_addr);
+ dev_info(&hdev->pdev->dev,
+ "MAC of VF %d has been set to %pM, and it will be reinitialized!\n",
+ vf, mac_addr);
+
+ return hclge_inform_reset_assert_to_vf(vport);
+}
+
static int hclge_add_mgr_tbl(struct hclge_dev *hdev,
const struct hclge_mac_mgr_tbl_entry_cmd *req)
{
@@@ -7774,7 -7563,7 +7774,7 @@@ static int hclge_set_vf_vlan_common(str
bool is_kill, u16 vlan,
__be16 proto)
{
-#define HCLGE_MAX_VF_BYTES 16
+ struct hclge_vport *vport = &hdev->vport[vfid];
struct hclge_vlan_filter_vf_cfg_cmd *req0;
struct hclge_vlan_filter_vf_cfg_cmd *req1;
struct hclge_desc desc[2];
@@@ -7783,18 -7572,10 +7783,18 @@@
int ret;
/* if vf vlan table is full, firmware will close vf vlan filter, it
- * is unable and unnecessary to add new vlan id to vf vlan filter
+ * is unable and unnecessary to add new vlan id to vf vlan filter.
+ * If spoof check is enable, and vf vlan is full, it shouldn't add
+ * new vlan, because tx packets with these vlan id will be dropped.
*/
- if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill)
+ if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) {
+ if (vport->vf_info.spoofchk && vlan) {
+ dev_err(&hdev->pdev->dev,
+ "Can't add vlan due to spoof check is on and vf vlan table is
full\n");
+ return -EPERM;
+ }
return 0;
+ }
hclge_cmd_setup_basic_desc(&desc[0],
HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
@@@ -7838,7 -7619,7 +7838,7 @@@
}
dev_err(&hdev->pdev->dev,
- "Add vf vlan filter fail, ret =%d.\n",
+ "Add vf vlan filter fail, ret =%u.\n",
req0->resp_code);
} else {
#define HCLGE_VF_VLAN_DEL_NO_FOUND 1
@@@ -7854,7 -7635,7 +7854,7 @@@
return 0;
dev_err(&hdev->pdev->dev,
- "Kill vf vlan filter fail, ret =%d.\n",
+ "Kill vf vlan filter fail, ret =%u.\n",
req0->resp_code);
}
@@@ -7873,10 -7654,9 +7873,10 @@@ static int hclge_set_port_vlan_filter(s
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_PF_CFG, false);
- vlan_offset_160 = vlan_id / 160;
- vlan_offset_byte = (vlan_id % 160) / 8;
- vlan_offset_byte_val = 1 << (vlan_id % 8);
+ vlan_offset_160 = vlan_id / HCLGE_VLAN_ID_OFFSET_STEP;
+ vlan_offset_byte = (vlan_id % HCLGE_VLAN_ID_OFFSET_STEP) /
+ HCLGE_VLAN_BYTE_SIZE;
+ vlan_offset_byte_val = 1 << (vlan_id % HCLGE_VLAN_BYTE_SIZE);
req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data;
req->vlan_offset = vlan_offset_160;
@@@ -7904,7 -7684,7 +7904,7 @@@ static int hclge_set_vlan_filter_hw(str
proto);
if (ret) {
dev_err(&hdev->pdev->dev,
- "Set %d vport vlan filter config fail, ret =%d.\n",
+ "Set %u vport vlan filter config fail, ret =%d.\n",
vport_id, ret);
return ret;
}
@@@ -7916,7 -7696,7 +7916,7 @@@
if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) {
dev_err(&hdev->pdev->dev,
- "Add port vlan failed, vport %d is already in vlan %d\n",
+ "Add port vlan failed, vport %u is already in vlan %u\n",
vport_id, vlan_id);
return -EINVAL;
}
@@@ -7924,7 -7704,7 +7924,7 @@@
if (is_kill &&
!test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) {
dev_err(&hdev->pdev->dev,
- "Delete port vlan failed, vport %d is not in vlan %d\n",
+ "Delete port vlan failed, vport %u is not in vlan %u\n",
vport_id, vlan_id);
return -EINVAL;
}
@@@ -8292,15 -8072,12 +8292,15 @@@ static void hclge_restore_vlan_table(st
}
list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
- if (vlan->hd_tbl_status)
- hclge_set_vlan_filter_hw(hdev,
- htons(ETH_P_8021Q),
- vport->vport_id,
- vlan->vlan_id,
- false);
+ int ret;
+
+ if (!vlan->hd_tbl_status)
+ continue;
+ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+ vport->vport_id,
+ vlan->vlan_id, false);
+ if (ret)
+ break;
}
}
@@@ -8580,7 -8357,6 +8580,7 @@@ int hclge_set_vport_mtu(struct hclge_vp
struct hclge_dev *hdev = vport->back;
int i, max_frm_size, ret;
+ /* HW supprt 2 layer vlan */
max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
max_frm_size > HCLGE_MAC_MAX_FRAME)
@@@ -8996,16 -8772,16 +8996,16 @@@ static void hclge_info_show(struct hclg
dev_info(dev, "PF info begin:\n");
- dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
- dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
- dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
- dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
- dev_info(dev, "Numbers of vmdp vports: %d\n", hdev->num_vmdq_vport);
- dev_info(dev, "Numbers of VF for this PF: %d\n", hdev->num_req_vfs);
- dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
- dev_info(dev, "Total buffer size for TX/RX: %d\n", hdev->pkt_buf_size);
- dev_info(dev, "TX buffer size for each TC: %d\n", hdev->tx_buf_size);
- dev_info(dev, "DV buffer size for each TC: %d\n", hdev->dv_buf_size);
+ dev_info(dev, "Task queue pairs numbers: %u\n", hdev->num_tqps);
+ dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
+ dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
+ dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
+ dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport);
+ dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs);
+ dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
+ dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size);
+ dev_info(dev, "TX buffer size for each TC: %u\n", hdev->tx_buf_size);
+ dev_info(dev, "DV buffer size for each TC: %u\n", hdev->dv_buf_size);
dev_info(dev, "This is %s PF\n",
hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
dev_info(dev, "DCB %s\n",
@@@ -9021,9 -8797,10 +9021,9 @@@ static int hclge_init_nic_client_instan
{
struct hnae3_client *client = vport->nic.client;
struct hclge_dev *hdev = ae_dev->priv;
- int rst_cnt;
+ int rst_cnt = hdev->rst_stats.reset_cnt;
int ret;
- rst_cnt = hdev->rst_stats.reset_cnt;
ret = client->ops->init_instance(&vport->nic);
if (ret)
return ret;
@@@ -9123,6 -8900,7 +9123,6 @@@ static int hclge_init_client_instance(s
switch (client->type) {
case HNAE3_CLIENT_KNIC:
-
hdev->nic_client = client;
vport->nic.client = client;
ret = hclge_init_nic_client_instance(ae_dev, vport);
@@@ -9321,7 -9099,7 +9321,7 @@@ static void hclge_clear_resetting_state
ret = hclge_set_vf_rst(hdev, vport->vport_id, false);
if (ret)
dev_warn(&hdev->pdev->dev,
- "clear vf(%d) rst failed %d!\n",
+ "clear vf(%u) rst failed %d!\n",
vport->vport_id, ret);
}
}
@@@ -9343,8 -9121,6 +9343,8 @@@ static int hclge_init_ae_dev(struct hna
hdev->reset_type = HNAE3_NONE_RESET;
hdev->reset_level = HNAE3_FUNC_RESET;
ae_dev->priv = hdev;
+
+ /* HW supprt 2 layer vlan */
hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
mutex_init(&hdev->vport_lock);
@@@ -9543,219 -9319,6 +9543,219 @@@ static void hclge_stats_clear(struct hc
memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
}
+static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
+{
+ return hclge_config_switch_param(hdev, vf, enable,
+ HCLGE_SWITCH_ANTI_SPOOF_MASK);
+}
+
+static int hclge_set_vlan_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
+{
+ return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+ HCLGE_FILTER_FE_NIC_INGRESS_B,
+ enable, vf);
+}
+
+static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable)
+{
+ int ret;
+
+ ret = hclge_set_mac_spoofchk(hdev, vf, enable);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Set vf %d mac spoof check %s failed, ret=%d\n",
+ vf, enable ? "on" : "off", ret);
+ return ret;
+ }
+
+ ret = hclge_set_vlan_spoofchk(hdev, vf, enable);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "Set vf %d vlan spoof check %s failed, ret=%d\n",
+ vf, enable ? "on" : "off", ret);
+
+ return ret;
+}
+
+static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf,
+ bool enable)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ u32 new_spoofchk = enable ? 1 : 0;
+ int ret;
+
+ if (hdev->pdev->revision == 0x20)
+ return -EOPNOTSUPP;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ if (vport->vf_info.spoofchk == new_spoofchk)
+ return 0;
+
+ if (enable && test_bit(vport->vport_id, hdev->vf_vlan_full))
+ dev_warn(&hdev->pdev->dev,
+ "vf %d vlan table is full, enable spoof check may cause its packet send
fail\n",
+ vf);
+ else if (enable && hclge_is_umv_space_full(vport))
+ dev_warn(&hdev->pdev->dev,
+ "vf %d mac table is full, enable spoof check may cause its packet send
fail\n",
+ vf);
+
+ ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, enable);
+ if (ret)
+ return ret;
+
+ vport->vf_info.spoofchk = new_spoofchk;
+ return 0;
+}
+
+static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ int ret;
+ int i;
+
+ if (hdev->pdev->revision == 0x20)
+ return 0;
+
+ /* resume the vf spoof check state after reset */
+ for (i = 0; i < hdev->num_alloc_vport; i++) {
+ ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id,
+ vport->vf_info.spoofchk);
+ if (ret)
+ return ret;
+
+ vport++;
+ }
+
+ return 0;
+}
+
+static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ u32 new_trusted = enable ? 1 : 0;
+ bool en_bc_pmc;
+ int ret;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ if (vport->vf_info.trusted == new_trusted)
+ return 0;
+
+ /* Disable promisc mode for VF if it is not trusted any more. */
+ if (!enable && vport->vf_info.promisc_enable) {
+ en_bc_pmc = hdev->pdev->revision != 0x20;
+ ret = hclge_set_vport_promisc_mode(vport, false, false,
+ en_bc_pmc);
+ if (ret)
+ return ret;
+ vport->vf_info.promisc_enable = 0;
+ hclge_inform_vf_promisc_info(vport);
+ }
+
+ vport->vf_info.trusted = new_trusted;
+
+ return 0;
+}
+
+static void hclge_reset_vf_rate(struct hclge_dev *hdev)
+{
+ int ret;
+ int vf;
+
+ /* reset vf rate to default value */
+ for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) {
+ struct hclge_vport *vport = &hdev->vport[vf];
+
+ vport->vf_info.max_tx_rate = 0;
+ ret = hclge_tm_qs_shaper_cfg(vport, vport->vf_info.max_tx_rate);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "vf%d failed to reset to default, ret=%d\n",
+ vf - HCLGE_VF_VPORT_START_NUM, ret);
+ }
+}
+
+static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
+ int min_tx_rate, int max_tx_rate)
+{
+ if (min_tx_rate != 0 ||
+ max_tx_rate < 0 || max_tx_rate > hdev->hw.mac.max_speed) {
+ dev_err(&hdev->pdev->dev,
+ "min_tx_rate:%d [0], max_tx_rate:%d [0, %u]\n",
+ min_tx_rate, max_tx_rate, hdev->hw.mac.max_speed);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf,
+ int min_tx_rate, int max_tx_rate, bool force)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
+ if (ret)
+ return ret;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ if (!force && max_tx_rate == vport->vf_info.max_tx_rate)
+ return 0;
+
+ ret = hclge_tm_qs_shaper_cfg(vport, max_tx_rate);
+ if (ret)
+ return ret;
+
+ vport->vf_info.max_tx_rate = max_tx_rate;
+
+ return 0;
+}
+
+static int hclge_resume_vf_rate(struct hclge_dev *hdev)
+{
+ struct hnae3_handle *handle = &hdev->vport->nic;
+ struct hclge_vport *vport;
+ int ret;
+ int vf;
+
+ /* resume the vf max_tx_rate after reset */
+ for (vf = 0; vf < pci_num_vf(hdev->pdev); vf++) {
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
+ /* zero means max rate, after reset, firmware already set it to
+ * max rate, so just continue.
+ */
+ if (!vport->vf_info.max_tx_rate)
+ continue;
+
+ ret = hclge_set_vf_rate(handle, vf, 0,
+ vport->vf_info.max_tx_rate, true);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "vf%d failed to resume tx_rate:%u, ret=%d\n",
+ vf, vport->vf_info.max_tx_rate, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static void hclge_reset_vport_state(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
@@@ -9833,9 -9396,6 +9833,9 @@@ static int hclge_reset_ae_dev(struct hn
return ret;
}
+ /* Log and clear the hw errors those already occurred */
+ hclge_handle_all_hns_hw_errors(ae_dev);
+
/* Re-enable the hw error interrupts because
* the interrupts get disabled on global reset.
*/
@@@ -9858,13 -9418,6 +9858,13 @@@
}
hclge_reset_vport_state(hdev);
+ ret = hclge_reset_vport_spoofchk(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_resume_vf_rate(hdev);
+ if (ret)
+ return ret;
dev_info(&pdev->dev, "Reset done, %s driver initialization
finished.\n",
HCLGE_DRIVER_NAME);
@@@ -9877,7 -9430,6 +9877,7 @@@ static void hclge_uninit_ae_dev(struct
struct hclge_dev *hdev = ae_dev->priv;
struct hclge_mac *mac = &hdev->hw.mac;
+ hclge_reset_vf_rate(hdev);
hclge_misc_affinity_teardown(hdev);
hclge_state_uninit(hdev);
@@@ -9942,8 -9494,8 +9942,8 @@@ static int hclge_set_channels(struct hn
u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
struct hclge_dev *hdev = vport->back;
u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
- int cur_rss_size = kinfo->rss_size;
- int cur_tqps = kinfo->num_tqps;
+ u16 cur_rss_size = kinfo->rss_size;
+ u16 cur_tqps = kinfo->num_tqps;
u16 tc_valid[HCLGE_MAX_TC_NUM];
u16 roundup_size;
u32 *rss_indir;
@@@ -9997,7 -9549,7 +9997,7 @@@
out:
if (!ret)
dev_info(&hdev->pdev->dev,
- "Channels changed, rss_size from %d to %d, tqps from %d to %d",
+ "Channels changed, rss_size from %u to %u, tqps from %u to %u",
cur_rss_size, kinfo->rss_size,
cur_tqps, kinfo->rss_size * kinfo->num_tc);
@@@ -10246,7 -9798,7 +10246,7 @@@ static int hclge_get_dfx_reg_len(struc
return ret;
}
- data_len_per_desc = FIELD_SIZEOF(struct hclge_desc, data);
+ data_len_per_desc = sizeof_member(struct hclge_desc, data);
*len = 0;
for (i = 0; i < dfx_reg_type_num; i++) {
bd_num = bd_num_list[i];
@@@ -10600,12 -10152,6 +10600,12 @@@ static const struct hnae3_ae_ops hclge_
.mac_connect_phy = hclge_mac_connect_phy,
.mac_disconnect_phy = hclge_mac_disconnect_phy,
.restore_vlan_table = hclge_restore_vlan_table,
+ .get_vf_config = hclge_get_vf_config,
+ .set_vf_link_state = hclge_set_vf_link_state,
+ .set_vf_spoofchk = hclge_set_vf_spoofchk,
+ .set_vf_trust = hclge_set_vf_trust,
+ .set_vf_rate = hclge_set_vf_rate,
+ .set_vf_mac = hclge_set_vf_mac,
};
static struct hnae3_ae_algo ae_algo = {
diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index fbc39a2480d0,a0f08f94b12b..a1fe95b1a476
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@@ -46,7 -46,7 +46,7 @@@ static int hclge_shaper_para_calc(u32 i
#define DIVISOR_CLK (1000 * 8)
#define DIVISOR_IR_B_126 (126 * DIVISOR_CLK)
- const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
+ static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
6 * 256, /* Prioriy level */
6 * 32, /* Prioriy group level */
6 * 8, /* Port level */
@@@ -511,49 -511,6 +511,49 @@@ static int hclge_tm_qs_bp_cfg(struct hc
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
+int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate)
+{
+ struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+ struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_desc desc;
+ u8 ir_b, ir_u, ir_s;
+ u32 shaper_para;
+ int ret, i;
+
+ if (!max_tx_rate)
+ max_tx_rate = HCLGE_ETHER_MAX_RATE;
+
+ ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET,
+ &ir_b, &ir_u, &ir_s);
+ if (ret)
+ return ret;
+
+ shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+
+ for (i = 0; i < kinfo->num_tc; i++) {
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG,
+ false);
+
+ shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
+ shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i);
+ shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "vf%u, qs%u failed to set tx_rate:%d, ret=%d\n",
+ vport->vport_id, shap_cfg_cmd->qs_id,
+ max_tx_rate, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
{
struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
@@@ -575,21 -532,14 +575,21 @@@
/* Set to user value, no larger than max_rss_size. */
if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size
&&
kinfo->req_rss_size <= max_rss_size) {
- dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
+ dev_info(&hdev->pdev->dev, "rss changes from %u to %u\n",
kinfo->rss_size, kinfo->req_rss_size);
kinfo->rss_size = kinfo->req_rss_size;
} else if (kinfo->rss_size > max_rss_size ||
(!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
+ /* if user not set rss, the rss_size should compare with the
+ * valid msi numbers to ensure one to one map between tqp and
+ * irq as default.
+ */
+ if (!kinfo->req_rss_size)
+ max_rss_size = min_t(u16, max_rss_size,
+ (hdev->num_nic_msi - 1) /
+ kinfo->num_tc);
+
/* Set to the maximum specification value (max_rss_size). */
- dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
- kinfo->rss_size, max_rss_size);
kinfo->rss_size = max_rss_size;
}
@@@ -614,7 -564,7 +614,7 @@@
}
memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc,
- FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc));
+ sizeof_member(struct hnae3_knic_private_info, prio_tc));
}
static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index d24d8731bef0,8b0c60d754e1..990e29ff7ac4
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@@ -43,7 -43,7 +43,7 @@@ struct i40e_stats
*/
#define I40E_STAT(_type, _name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+ .sizeof_stat = sizeof_member(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
@@@ -722,14 -722,7 +722,14 @@@ static void i40e_get_settings_link_up_f
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
- if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
+ if ((I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) &&
+ (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info)) {
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ FEC_NONE);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ FEC_BASER);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+ } else if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
} else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) {
ethtool_link_ksettings_add_link_mode(ks, advertising,
@@@ -737,6 -730,12 +737,6 @@@
} else {
ethtool_link_ksettings_add_link_mode(ks, advertising,
FEC_NONE);
- if (I40E_AQ_SET_FEC_AUTO & req_fec_info) {
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- FEC_RS);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- FEC_BASER);
- }
}
}
@@@ -1438,7 -1437,6 +1438,7 @@@ static int i40e_get_fec_param(struct ne
struct i40e_hw *hw = &pf->hw;
i40e_status status = 0;
int err = 0;
+ u8 fec_cfg;
/* Get the current phy config */
memset(&abilities, 0, sizeof(abilities));
@@@ -1450,16 -1448,18 +1450,16 @@@
}
fecparam->fec = 0;
- if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO)
+ fec_cfg = abilities.fec_cfg_curr_mod_ext_info;
+ if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
fecparam->fec |= ETHTOOL_FEC_AUTO;
- if ((abilities.fec_cfg_curr_mod_ext_info &
- I40E_AQ_SET_FEC_REQUEST_RS) ||
- (abilities.fec_cfg_curr_mod_ext_info &
- I40E_AQ_SET_FEC_ABILITY_RS))
+ else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_RS |
+ I40E_AQ_SET_FEC_ABILITY_RS))
fecparam->fec |= ETHTOOL_FEC_RS;
- if ((abilities.fec_cfg_curr_mod_ext_info &
- I40E_AQ_SET_FEC_REQUEST_KR) ||
- (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR))
+ else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_KR |
+ I40E_AQ_SET_FEC_ABILITY_KR))
fecparam->fec |= ETHTOOL_FEC_BASER;
- if (abilities.fec_cfg_curr_mod_ext_info == 0)
+ if (fec_cfg == 0)
fecparam->fec |= ETHTOOL_FEC_OFF;
if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA)
@@@ -5112,7 -5112,7 +5112,7 @@@ static int i40e_get_module_info(struct
case I40E_MODULE_TYPE_SFP:
status = i40e_aq_get_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
- I40E_I2C_EEPROM_DEV_ADDR,
+ I40E_I2C_EEPROM_DEV_ADDR, true,
I40E_MODULE_SFF_8472_COMP,
&sff8472_comp, NULL);
if (status)
@@@ -5120,7 -5120,7 +5120,7 @@@
status = i40e_aq_get_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
- I40E_I2C_EEPROM_DEV_ADDR,
+ I40E_I2C_EEPROM_DEV_ADDR, true,
I40E_MODULE_SFF_8472_SWAP,
&sff8472_swap, NULL);
if (status)
@@@ -5152,7 -5152,7 +5152,7 @@@
/* Read from memory page 0. */
status = i40e_aq_get_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
- 0,
+ 0, true,
I40E_MODULE_REVISION_ADDR,
&sff8636_rev, NULL);
if (status)
@@@ -5223,7 -5223,7 +5223,7 @@@ static int i40e_get_module_eeprom(struc
status = i40e_aq_get_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
- addr, offset, &value, NULL);
+ true, addr, offset, &value, NULL);
if (status)
return -EIO;
data[i] = value;
@@@ -5242,7 -5242,6 +5242,7 @@@ static int i40e_set_eee(struct net_devi
}
static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = {
+ .get_drvinfo = i40e_get_drvinfo,
.set_eeprom = i40e_set_eeprom,
.get_eeprom_len = i40e_get_eeprom_len,
.get_eeprom = i40e_get_eeprom,
diff --combined drivers/net/ethernet/intel/ice/ice_ethtool.c
index aec3c6c379df,cde113e7de41..fbbb7c826e66
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@@ -15,7 -15,7 +15,7 @@@ struct ice_stats
#define ICE_STAT(_type, _name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+ .sizeof_stat = sizeof_member(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
@@@ -36,10 -36,10 +36,10 @@@ static int ice_q_stats_len(struct net_d
#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats)
#define ICE_PFC_STATS_LEN ( \
- (FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_rx) + \
- FIELD_SIZEOF(struct ice_pf, stats.priority_xon_rx) + \
- FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_tx) + \
- FIELD_SIZEOF(struct ice_pf, stats.priority_xon_tx)) \
+ (sizeof_member(struct ice_pf, stats.priority_xoff_rx) + \
+ sizeof_member(struct ice_pf, stats.priority_xon_rx) + \
+ sizeof_member(struct ice_pf, stats.priority_xoff_tx) + \
+ sizeof_member(struct ice_pf, stats.priority_xon_tx)) \
/ sizeof(u64))
#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
ICE_VSI_STATS_LEN + ice_q_stats_len(n))
@@@ -156,7 -156,6 +156,7 @@@ struct ice_priv_flag
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+ ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
@@@ -248,7 -247,7 +248,7 @@@ ice_get_eeprom(struct net_device *netde
int ret = 0;
u16 *buf;
- dev = &pf->pdev->dev;
+ dev = ice_pf_to_dev(pf);
eeprom->magic = hw->vendor_id | (hw->device_id << 16);
@@@ -343,7 -342,6 +343,7 @@@ static u64 ice_eeprom_test(struct net_d
static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
{
struct ice_pf *pf = (struct ice_pf *)hw->back;
+ struct device *dev = ice_pf_to_dev(pf);
static const u32 patterns[] = {
0x5A5A5A5A, 0xA5A5A5A5,
0x00000000, 0xFFFFFFFF
@@@ -359,7 -357,7 +359,7 @@@
val = rd32(hw, reg);
if (val == pattern)
continue;
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
, __func__, reg, pattern, val);
return 1;
@@@ -368,7 -366,7 +368,7 @@@
wr32(hw, reg, orig_val);
val = rd32(hw, reg);
if (val != orig_val) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
, __func__, reg, orig_val, val);
return 1;
@@@ -508,7 -506,7 +508,7 @@@ static int ice_lbtest_create_frame(stru
if (!pf)
return -EINVAL;
- data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+ data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
if (!data)
return -ENOMEM;
@@@ -625,7 -623,7 +625,7 @@@ static int ice_lbtest_receive_frames(st
continue;
rx_buf = &rx_ring->rx_buf[i];
- received_buf = page_address(rx_buf->page);
+ received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
if (ice_lbtest_check_frame(received_buf))
valid_frames++;
@@@ -650,11 -648,9 +650,11 @@@ static u64 ice_loopback_test(struct net
u8 broadcast[ETH_ALEN], ret = 0;
int num_frames, valid_frames;
LIST_HEAD(tmp_list);
+ struct device *dev;
u8 *tx_frame;
int i;
+ dev = ice_pf_to_dev(pf);
netdev_info(netdev, "loopback test\n");
test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
@@@ -715,12 -711,12 +715,12 @@@
ret = 10;
lbtest_free_frame:
- devm_kfree(&pf->pdev->dev, tx_frame);
+ devm_kfree(dev, tx_frame);
remove_mac_filters:
if (ice_remove_mac(&pf->hw, &tmp_list))
netdev_err(netdev, "Could not remove MAC filter for the test VSI");
free_mac_list:
- ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+ ice_free_fltr_list(dev, &tmp_list);
lbtest_mac_dis:
/* Disable MAC loopback after the test is completed. */
if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
@@@ -777,9 -773,6 +777,9 @@@ ice_self_test(struct net_device *netdev
struct ice_netdev_priv *np = netdev_priv(netdev);
bool if_running = netif_running(netdev);
struct ice_pf *pf = np->vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
netdev_info(netdev, "offline testing starting\n");
@@@ -787,7 -780,7 +787,7 @@@
set_bit(__ICE_TESTING, pf->state);
if (ice_active_vfs(pf)) {
- dev_warn(&pf->pdev->dev,
+ dev_warn(dev,
"Please take active VFs and Netqueues offline and restart the adapter before
running NIC diagnostics\n");
data[ICE_ETH_TEST_REG] = 1;
data[ICE_ETH_TEST_EEPROM] = 1;
@@@ -822,7 -815,8 +822,7 @@@
int status = ice_open(netdev);
if (status) {
- dev_err(&pf->pdev->dev,
- "Could not open device %s, err %d",
+ dev_err(dev, "Could not open device %s, err %d",
pf->int_name, status);
}
}
@@@ -967,7 -961,7 +967,7 @@@ static int ice_set_fec_cfg(struct net_d
}
/* Get last SW configuration */
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
@@@ -1012,7 -1006,7 +1012,7 @@@
}
done:
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
return err;
}
@@@ -1088,7 -1082,7 +1088,7 @@@ ice_get_fecparam(struct net_device *net
break;
}
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
@@@ -1115,7 -1109,7 +1115,7 @@@
fecparam->fec |= ETHTOOL_FEC_OFF;
done:
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
return err;
}
@@@ -1160,14 -1154,12 +1160,14 @@@ static int ice_set_priv_flags(struct ne
DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct device *dev;
int ret = 0;
u32 i;
if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
return -EINVAL;
+ dev = ice_pf_to_dev(pf);
set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
@@@ -1196,7 -1188,7 +1196,7 @@@
* events to respond to.
*/
if (status)
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"Failed to unreg for LLDP events\n");
/* The AQ call to stop the FW LLDP agent will generate
@@@ -1204,14 -1196,20 +1204,14 @@@
*/
status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
if (status)
- dev_warn(&pf->pdev->dev,
- "Fail to stop LLDP agent\n");
+ dev_warn(dev, "Fail to stop LLDP agent\n");
/* Use case for having the FW LLDP agent stopped
* will likely not need DCB, so failure to init is
* not a concern of ethtool
*/
status = ice_init_pf_dcb(pf, true);
if (status)
- dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
-
- /* Forward LLDP packets to default VSI so that they
- * are passed up the stack
- */
- ice_cfg_sw_lldp(vsi, false, true);
+ dev_warn(dev, "Fail to init DCB\n");
} else {
enum ice_status status;
bool dcbx_agent_status;
@@@ -1221,7 -1219,8 +1221,7 @@@
*/
status = ice_aq_start_lldp(&pf->hw, true, NULL);
if (status)
- dev_warn(&pf->pdev->dev,
- "Fail to start LLDP Agent\n");
+ dev_warn(dev, "Fail to start LLDP Agent\n");
/* AQ command to start FW DCBX agent will fail if
* the agent is already started
@@@ -1230,9 -1229,10 +1230,9 @@@
&dcbx_agent_status,
NULL);
if (status)
- dev_dbg(&pf->pdev->dev,
- "Failed to start FW DCBX\n");
+ dev_dbg(dev, "Failed to start FW DCBX\n");
- dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n",
+ dev_info(dev, "FW DCBX agent is %s\n",
dcbx_agent_status ? "ACTIVE" : "DISABLED");
/* Failure to configure MIB change or init DCB is not
@@@ -1242,7 -1242,7 +1242,7 @@@
*/
status = ice_init_pf_dcb(pf, true);
if (status)
- dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+ dev_dbg(dev, "Fail to init DCB\n");
/* Remove rule to direct LLDP packets to default VSI.
* The FW LLDP engine will now be consuming them.
@@@ -1252,15 -1252,10 +1252,15 @@@
/* Register for MIB change events */
status = ice_cfg_lldp_mib_change(&pf->hw, true);
if (status)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"Fail to enable MIB change events\n");
}
}
+ if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+ /* down and up VSI so that changes of Rx cfg are reflected. */
+ ice_down(vsi);
+ ice_up(vsi);
+ }
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
}
@@@ -2145,7 -2140,7 +2145,7 @@@ ice_get_link_ksettings(struct net_devic
/* flow control is symmetric and always supported */
ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
@@@ -2203,7 -2198,7 +2203,7 @@@
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
done:
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
return err;
}
@@@ -2432,7 -2427,8 +2432,7 @@@ ice_set_link_ksettings(struct net_devic
usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
}
- abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities),
- GFP_KERNEL);
+ abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
if (!abilities)
return -ENOMEM;
@@@ -2524,7 -2520,7 +2524,7 @@@
}
done:
- devm_kfree(&pf->pdev->dev, abilities);
+ kfree(abilities);
clear_bit(__ICE_CFG_BUSY, pf->state);
return err;
@@@ -2581,7 -2577,6 +2581,7 @@@ ice_set_ringparam(struct net_device *ne
{
struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_ring *xdp_rings = NULL;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int i, timeout = 50, err = 0;
@@@ -2616,13 -2611,6 +2616,13 @@@
return 0;
}
+ /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ * disallow changing the number of descriptors -- regardless
+ * if the netdev is running or not.
+ */
+ if (ice_xsk_any_rx_ring_ena(vsi))
+ return -EBUSY;
+
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
@@@ -2636,11 -2624,6 +2636,11 @@@
vsi->tx_rings[i]->count = new_tx_cnt;
for (i = 0; i < vsi->alloc_rxq; i++)
vsi->rx_rings[i]->count = new_rx_cnt;
+ if (ice_is_xdp_ena_vsi(vsi))
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->count = new_tx_cnt;
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is
brought up\n");
goto done;
}
@@@ -2652,13 -2635,14 +2652,13 @@@
netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
vsi->tx_rings[0]->count, new_tx_cnt);
- tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
- sizeof(*tx_rings), GFP_KERNEL);
+ tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
if (!tx_rings) {
err = -ENOMEM;
goto done;
}
- for (i = 0; i < vsi->alloc_txq; i++) {
+ ice_for_each_txq(vsi, i) {
/* clone ring and setup updated count */
tx_rings[i] = *vsi->tx_rings[i];
tx_rings[i].count = new_tx_cnt;
@@@ -2666,42 -2650,15 +2666,42 @@@
tx_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
- while (i) {
- i--;
+ while (i--)
ice_clean_tx_ring(&tx_rings[i]);
- }
- devm_kfree(&pf->pdev->dev, tx_rings);
+ kfree(tx_rings);
goto done;
}
}
+ if (!ice_is_xdp_ena_vsi(vsi))
+ goto process_rx;
+
+ /* alloc updated XDP resources */
+ netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+ vsi->xdp_rings[0]->count, new_tx_cnt);
+
+ xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
+ if (!xdp_rings) {
+ err = -ENOMEM;
+ goto free_tx;
+ }
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ /* clone ring and setup updated count */
+ xdp_rings[i] = *vsi->xdp_rings[i];
+ xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].desc = NULL;
+ xdp_rings[i].tx_buf = NULL;
+ err = ice_setup_tx_ring(&xdp_rings[i]);
+ if (err) {
+ while (i--)
+ ice_clean_tx_ring(&xdp_rings[i]);
+ kfree(xdp_rings);
+ goto free_tx;
+ }
+ ice_set_ring_xdp(&xdp_rings[i]);
+ }
+
process_rx:
if (new_rx_cnt == vsi->rx_rings[0]->count)
goto process_link;
@@@ -2710,13 -2667,14 +2710,13 @@@
netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
vsi->rx_rings[0]->count, new_rx_cnt);
- rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
- sizeof(*rx_rings), GFP_KERNEL);
+ rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
if (!rx_rings) {
err = -ENOMEM;
goto done;
}
- for (i = 0; i < vsi->alloc_rxq; i++) {
+ ice_for_each_rxq(vsi, i) {
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_cnt;
@@@ -2740,7 -2698,7 +2740,7 @@@ rx_unwind
i--;
ice_free_rx_ring(&rx_rings[i]);
}
- devm_kfree(&pf->pdev->dev, rx_rings);
+ kfree(rx_rings);
err = -ENOMEM;
goto free_tx;
}
@@@ -2754,15 -2712,15 +2754,15 @@@ process_link
ice_down(vsi);
if (tx_rings) {
- for (i = 0; i < vsi->alloc_txq; i++) {
+ ice_for_each_txq(vsi, i) {
ice_free_tx_ring(vsi->tx_rings[i]);
*vsi->tx_rings[i] = tx_rings[i];
}
- devm_kfree(&pf->pdev->dev, tx_rings);
+ kfree(tx_rings);
}
if (rx_rings) {
- for (i = 0; i < vsi->alloc_rxq; i++) {
+ ice_for_each_rxq(vsi, i) {
ice_free_rx_ring(vsi->rx_rings[i]);
/* copy the real tail offset */
rx_rings[i].tail = vsi->rx_rings[i]->tail;
@@@ -2776,19 -2734,9 +2776,19 @@@
rx_rings[i].next_to_alloc = 0;
*vsi->rx_rings[i] = rx_rings[i];
}
- devm_kfree(&pf->pdev->dev, rx_rings);
+ kfree(rx_rings);
+ }
+
+ if (xdp_rings) {
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ *vsi->xdp_rings[i] = xdp_rings[i];
+ }
+ kfree(xdp_rings);
}
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
ice_up(vsi);
}
goto done;
@@@ -2796,9 -2744,9 +2796,9 @@@
free_tx:
/* error cleanup if the Rx allocations failed after getting Tx */
if (tx_rings) {
- for (i = 0; i < vsi->alloc_txq; i++)
+ ice_for_each_txq(vsi, i)
ice_free_tx_ring(&tx_rings[i]);
- devm_kfree(&pf->pdev->dev, tx_rings);
+ kfree(tx_rings);
}
done:
@@@ -2846,6 -2794,7 +2846,6 @@@ ice_get_pauseparam(struct net_device *n
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_port_info *pi = np->vsi->port_info;
struct ice_aqc_get_phy_caps_data *pcaps;
- struct ice_vsi *vsi = np->vsi;
struct ice_dcbx_cfg *dcbx_cfg;
enum ice_status status;
@@@ -2855,7 -2804,8 +2855,7 @@@
dcbx_cfg = &pi->local_dcbx_cfg;
- pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps),
- GFP_KERNEL);
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return;
@@@ -2878,7 -2828,7 +2878,7 @@@
pause->rx_pause = 1;
out:
- devm_kfree(&vsi->back->pdev->dev, pcaps);
+ kfree(pcaps);
}
/**
@@@ -3059,7 -3009,7 +3059,7 @@@ ice_get_rxfh(struct net_device *netdev
return -EIO;
}
- lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
@@@ -3072,7 -3022,7 +3072,7 @@@
indir[i] = (u32)(lut[i]);
out:
- devm_kfree(&pf->pdev->dev, lut);
+ kfree(lut);
return ret;
}
@@@ -3093,10 -3043,8 +3093,10 @@@ ice_set_rxfh(struct net_device *netdev
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct device *dev;
u8 *seed = NULL;
+ dev = ice_pf_to_dev(pf);
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
@@@ -3109,7 -3057,8 +3109,7 @@@
if (key) {
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user =
- devm_kzalloc(&pf->pdev->dev,
- ICE_VSIQF_HKEY_ARRAY_SIZE,
+ devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
GFP_KERNEL);
if (!vsi->rss_hkey_user)
return -ENOMEM;
@@@ -3119,7 -3068,8 +3119,7 @@@
}
if (!vsi->rss_lut_user) {
- vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev,
- vsi->rss_table_size,
+ vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
GFP_KERNEL);
if (!vsi->rss_lut_user)
return -ENOMEM;
@@@ -3142,188 -3092,6 +3142,188 @@@
return 0;
}
+/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_txq(struct ice_pf *pf)
+{
+ return min_t(int, num_online_cpus(),
+ pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_rxq(struct ice_pf *pf)
+{
+ return min_t(int, num_online_cpus(),
+ pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
+ * ice_get_combined_cnt - return the current number of combined channels
+ * @vsi: PF VSI pointer
+ *
+ * Go through all queue vectors and count ones that have both Rx and Tx ring
+ * attached
+ */
+static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
+{
+ u32 combined = 0;
+ int q_idx;
+
+ ice_for_each_q_vector(vsi, q_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring && q_vector->tx.ring)
+ combined++;
+ }
+
+ return combined;
+}
+
+/**
+ * ice_get_channels - get the current and max supported channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static void
+ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+
+ /* check to see if VSI is active */
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return;
+
+ /* report maximum channels */
+ ch->max_rx = ice_get_max_rxq(pf);
+ ch->max_tx = ice_get_max_txq(pf);
+ ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
+
+ /* report current channels */
+ ch->combined_count = ice_get_combined_cnt(vsi);
+ ch->rx_count = vsi->num_rxq - ch->combined_count;
+ ch->tx_count = vsi->num_txq - ch->combined_count;
+}
+
+/**
+ * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
+ * @vsi: VSI to reconfigure RSS LUT on
+ * @req_rss_size: requested range of queue numbers for hashing
+ *
+ * Set the VSI's RSS parameters, configure the RSS LUT based on these.
+ */
+static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
+{
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ struct device *dev;
+ struct ice_hw *hw;
+ int err = 0;
+ u8 *lut;
+
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+
+ if (!req_rss_size)
+ return -EINVAL;
+
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* set RSS LUT parameters */
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ vsi->rss_size = 1;
+ } else {
+ struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+ vsi->rss_size = min_t(int, req_rss_size,
+ BIT(caps->rss_table_entry_width));
+ }
+
+ /* create/set RSS LUT */
+ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+ status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
+ vsi->rss_table_size);
+ if (status) {
+ dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
+ status, hw->adminq.rq_last_status);
+ err = -EIO;
+ }
+
+ kfree(lut);
+ return err;
+}
+
+/**
+ * ice_set_channels - set the number channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ int new_rx = 0, new_tx = 0;
+ u32 curr_combined;
+
+ /* do not support changing channels in Safe Mode */
+ if (ice_is_safe_mode(pf)) {
+ netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ /* do not support changing other_count */
+ if (ch->other_count)
+ return -EINVAL;
+
+ curr_combined = ice_get_combined_cnt(vsi);
+
+ /* these checks are for cases where user didn't specify a particular
+ * value on cmd line but we get non-zero value anyway via
+ * get_channels(); look at ethtool.c in ethtool repository (the user
+ * space part), particularly, do_schannels() routine
+ */
+ if (ch->rx_count == vsi->num_rxq - curr_combined)
+ ch->rx_count = 0;
+ if (ch->tx_count == vsi->num_txq - curr_combined)
+ ch->tx_count = 0;
+ if (ch->combined_count == curr_combined)
+ ch->combined_count = 0;
+
+ if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
+ netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
+ return -EINVAL;
+ }
+
+ new_rx = ch->combined_count + ch->rx_count;
+ new_tx = ch->combined_count + ch->tx_count;
+
+ if (new_rx > ice_get_max_rxq(pf)) {
+ netdev_err(dev, "Maximum allowed Rx channels is %d\n",
+ ice_get_max_rxq(pf));
+ return -EINVAL;
+ }
+ if (new_tx > ice_get_max_txq(pf)) {
+ netdev_err(dev, "Maximum allowed Tx channels is %d\n",
+ ice_get_max_txq(pf));
+ return -EINVAL;
+ }
+
+ ice_vsi_recfg_qs(vsi, new_rx, new_tx);
+
+ if (new_rx && !netif_is_rxfh_configured(dev))
+ return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+
+ return 0;
+}
+
enum ice_container_type {
ICE_RX_CONTAINER,
ICE_TX_CONTAINER,
@@@ -3363,7 -3131,7 +3363,7 @@@ ice_get_rc_coalesce(struct ethtool_coal
ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
break;
default:
- dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type);
+ dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type);
return -EINVAL;
}
@@@ -3503,8 -3271,7 +3503,8 @@@ ice_set_rc_coalesce(enum ice_container_
break;
default:
- dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type);
+ dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+ c_type);
return -EINVAL;
}
@@@ -3601,17 -3368,10 +3601,17 @@@ __ice_set_coalesce(struct net_device *n
struct ice_vsi *vsi = np->vsi;
if (q_num < 0) {
- int i;
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ /* In some cases if DCB is configured the num_[rx|tx]q
+ * can be less than vsi->num_q_vectors. This check
+ * accounts for that so we don't report a false failure
+ */
+ if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
+ goto set_complete;
- ice_for_each_q_vector(vsi, i) {
- if (ice_set_q_coalesce(vsi, ec, i))
+ if (ice_set_q_coalesce(vsi, ec, v_idx))
return -EINVAL;
}
goto set_complete;
@@@ -3638,151 -3398,6 +3638,151 @@@ ice_set_per_q_coalesce(struct net_devic
return __ice_set_coalesce(netdev, ec, q_num);
}
+#define ICE_I2C_EEPROM_DEV_ADDR 0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2
+#define ICE_MODULE_TYPE_SFP 0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D
+#define ICE_MODULE_TYPE_QSFP28 0x11
+#define ICE_MODULE_SFF_ADDR_MODE 0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB 0x40
+#define ICE_MODULE_REVISION_ADDR 0x01
+#define ICE_MODULE_SFF_8472_COMP 0x5E
+#define ICE_MODULE_SFF_8472_SWAP 0x5C
+#define ICE_MODULE_QSFP_MAX_LEN 640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ u8 sff8472_comp = 0;
+ u8 sff8472_swap = 0;
+ u8 sff8636_rev = 0;
+ u8 value = 0;
+
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+ 0, &value, 1, 0, NULL);
+ if (status)
+ return -EIO;
+
+ switch (value) {
+ case ICE_MODULE_TYPE_SFP:
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+ &sff8472_comp, 1, 0, NULL);
+ if (status)
+ return -EIO;
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+ &sff8472_swap, 1, 0, NULL);
+ if (status)
+ return -EIO;
+
+ if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else if (sff8472_comp &&
+ (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ }
+ break;
+ case ICE_MODULE_TYPE_QSFP_PLUS:
+ case ICE_MODULE_TYPE_QSFP28:
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_REVISION_ADDR, 0x00, 0,
+ &sff8636_rev, 1, 0, NULL);
+ if (status)
+ return -EIO;
+ /* Check revision compliance */
+ if (sff8636_rev > 0x02) {
+ /* Module is SFF-8636 compliant */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+ }
+ break;
+ default:
+ netdev_warn(netdev,
+ "SFF Module Type not recognized.\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee, u8 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ bool is_sfp = false;
+ u16 offset = 0;
+ u8 value = 0;
+ u8 page = 0;
+ int i;
+
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
+ &value, 1, 0, NULL);
+ if (status)
+ return -EIO;
+
+ if (!ee || !ee->len || !data)
+ return -EINVAL;
+
+ if (value == ICE_MODULE_TYPE_SFP)
+ is_sfp = true;
+
+ for (i = 0; i < ee->len; i++) {
+ offset = i + ee->offset;
+
+ /* Check if we need to access the other memory page */
+ if (is_sfp) {
+ if (offset >= ETH_MODULE_SFF_8079_LEN) {
+ offset -= ETH_MODULE_SFF_8079_LEN;
+ addr = ICE_I2C_EEPROM_DEV_ADDR2;
+ }
+ } else {
+ while (offset >= ETH_MODULE_SFF_8436_LEN) {
+ /* Compute memory page number and offset. */
+ offset -= ETH_MODULE_SFF_8436_LEN / 2;
+ page++;
+ }
+ }
+
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
+ &value, 1, 0, NULL);
+ if (status)
+ value = 0;
+ data[i] = value;
+ }
+ return 0;
+}
+
static const struct ethtool_ops ice_ethtool_ops = {
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
@@@ -3813,15 -3428,11 +3813,15 @@@
.get_rxfh_indir_size = ice_get_rxfh_indir_size,
.get_rxfh = ice_get_rxfh,
.set_rxfh = ice_set_rxfh,
+ .get_channels = ice_get_channels,
+ .set_channels = ice_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_per_queue_coalesce = ice_get_per_q_coalesce,
.set_per_queue_coalesce = ice_set_per_q_coalesce,
.get_fecparam = ice_get_fecparam,
.set_fecparam = ice_set_fecparam,
+ .get_module_info = ice_get_module_info,
+ .get_module_eeprom = ice_get_module_eeprom,
};
static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
@@@ -3840,7 -3451,6 +3840,7 @@@
.get_ringparam = ice_get_ringparam,
.set_ringparam = ice_set_ringparam,
.nway_reset = ice_nway_reset,
+ .get_channels = ice_get_channels,
};
/**
diff --combined drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index ad34f22d44ef,b5867b42ca2f..110a9dfe7192
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@@ -211,7 -211,7 +211,7 @@@ enum ice_flex_rx_mdid
/* Rx/Tx Flag64 packet flag bits */
enum ice_flg64_bits {
ICE_FLG_PKT_DSI = 0,
- ICE_FLG_EVLAN_x8100 = 15,
+ ICE_FLG_EVLAN_x8100 = 14,
ICE_FLG_EVLAN_x9100,
ICE_FLG_VLAN_x8100,
ICE_FLG_TNL_MAC = 22,
@@@ -302,7 -302,7 +302,7 @@@ struct ice_ctx_ele
#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \
.offset = offsetof(struct _struct, _ele), \
- .size_of = FIELD_SIZEOF(struct _struct, _ele), \
+ .size_of = sizeof_member(struct _struct, _ele), \
.width = _width, \
.lsb = _lsb, \
}
diff --combined drivers/net/ethernet/marvell/mv643xx_eth.c
index d5b644131cff,6d3b7032f842..74d0381989ad
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@@ -1432,11 -1432,11 +1432,11 @@@ struct mv643xx_eth_stats
};
#define SSTAT(m) \
- { #m, FIELD_SIZEOF(struct net_device_stats, m), \
+ { #m, sizeof_member(struct net_device_stats, m), \
offsetof(struct net_device, stats.m), -1 }
#define MIBSTAT(m) \
- { #m, FIELD_SIZEOF(struct mib_counters, m), \
+ { #m, sizeof_member(struct mib_counters, m), \
-1, offsetof(struct mv643xx_eth_private, mib_counters.m) }
static const struct mv643xx_eth_stats mv643xx_eth_stats[] = {
@@@ -2959,16 -2959,15 +2959,16 @@@ static void set_params(struct mv643xx_e
static int get_phy_mode(struct mv643xx_eth_private *mp)
{
struct device *dev = mp->dev->dev.parent;
- int iface = -1;
+ phy_interface_t iface;
+ int err;
if (dev->of_node)
- iface = of_get_phy_mode(dev->of_node);
+ err = of_get_phy_mode(dev->of_node, &iface);
/* Historical default if unspecified. We could also read/write
* the interface state in the PSC1
*/
- if (iface < 0)
+ if (!dev->of_node || err)
iface = PHY_INTERFACE_MODE_GMII;
return iface;
}
diff --combined drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index a1202e53710c,f421699d745b..762688149cf1
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@@ -611,7 -611,7 +611,7 @@@ static u32 ptys_get_active_port(struct
}
#define MLX4_LINK_MODES_SZ \
- (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8)
+ (sizeof_member(struct mlx4_ptys_reg, eth_proto_cap) * 8)
enum ethtool_report {
SUPPORTED = 0,
@@@ -1745,7 -1745,6 +1745,7 @@@ static int mlx4_en_get_rxnfc(struct net
err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
break;
case ETHTOOL_GRXCLSRLALL:
+ cmd->data = MAX_NUM_OF_FS_RULES;
while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) {
err = mlx4_en_get_flow(dev, cmd, i);
if (!err)
@@@ -1812,7 -1811,6 +1812,7 @@@ static int mlx4_en_set_channels(struct
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_port_profile new_prof;
struct mlx4_en_priv *tmp;
+ int total_tx_count;
int port_up = 0;
int xdp_count;
int err = 0;
@@@ -1827,12 -1825,13 +1827,12 @@@
mutex_lock(&mdev->state_lock);
xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
- if (channel->tx_count * priv->prof->num_up + xdp_count >
- priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) {
+ total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count;
+ if (total_tx_count > MAX_TX_RINGS) {
err = -EINVAL;
en_err(priv,
"Total number of TX and XDP rings (%d) exceeds the maximum supported
(%d)\n",
- channel->tx_count * priv->prof->num_up + xdp_count,
- MAX_TX_RINGS);
+ total_tx_count, MAX_TX_RINGS);
goto out;
}
diff --combined drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d60577484567,82e6972f68b7..78aca2681b71
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@@ -209,7 -209,7 +209,7 @@@ enum fs_i_lock_class
};
static const struct rhashtable_params rhash_fte = {
- .key_len = FIELD_SIZEOF(struct fs_fte, val),
+ .key_len = sizeof_member(struct fs_fte, val),
.key_offset = offsetof(struct fs_fte, val),
.head_offset = offsetof(struct fs_fte, hash),
.automatic_shrinking = true,
@@@ -217,7 -217,7 +217,7 @@@
};
static const struct rhashtable_params rhash_fg = {
- .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
+ .key_len = sizeof_member(struct mlx5_flow_group, mask),
.key_offset = offsetof(struct mlx5_flow_group, mask),
.head_offset = offsetof(struct mlx5_flow_group, hash),
.automatic_shrinking = true,
@@@ -531,16 -531,9 +531,16 @@@ static void del_hw_fte(struct fs_node *
}
}
+static void del_sw_fte_rcu(struct rcu_head *head)
+{
+ struct fs_fte *fte = container_of(head, struct fs_fte, rcu);
+ struct mlx5_flow_steering *steering = get_steering(&fte->node);
+
+ kmem_cache_free(steering->ftes_cache, fte);
+}
+
static void del_sw_fte(struct fs_node *node)
{
- struct mlx5_flow_steering *steering = get_steering(node);
struct mlx5_flow_group *fg;
struct fs_fte *fte;
int err;
@@@ -553,8 -546,7 +553,8 @@@
rhash_fte);
WARN_ON(err);
ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
- kmem_cache_free(steering->ftes_cache, fte);
+
+ call_rcu(&fte->rcu, del_sw_fte_rcu);
}
static void del_hw_flow_group(struct fs_node *node)
@@@ -587,7 -579,7 +587,7 @@@ static void del_sw_flow_group(struct fs
rhashtable_destroy(&fg->ftes_hash);
ida_destroy(&fg->fte_allocator);
- if (ft->autogroup.active)
+ if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
ft->autogroup.num_groups--;
err = rhltable_remove(&ft->fgs_hash,
&fg->hash,
@@@ -1134,8 -1126,6 +1134,8 @@@ mlx5_create_auto_grouped_flow_table(str
ft->autogroup.active = true;
ft->autogroup.required_groups = max_num_groups;
+ /* We save place for flow groups in addition to max types */
+ ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
return ft;
}
@@@ -1338,7 -1328,8 +1338,7 @@@ static struct mlx5_flow_group *alloc_au
return ERR_PTR(-ENOENT);
if (ft->autogroup.num_groups < ft->autogroup.required_groups)
- /* We save place for flow groups in addition to max types */
- group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
+ group_size = ft->autogroup.group_size;
/* ft->max_fte == ft->autogroup.max_types */
if (group_size == 0)
@@@ -1365,8 -1356,7 +1365,8 @@@
if (IS_ERR(fg))
goto out;
- ft->autogroup.num_groups++;
+ if (group_size == ft->autogroup.group_size)
+ ft->autogroup.num_groups++;
out:
return fg;
@@@ -1633,47 -1623,22 +1633,47 @@@ static u64 matched_fgs_get_version(stru
}
static struct fs_fte *
-lookup_fte_locked(struct mlx5_flow_group *g,
- const u32 *match_value,
- bool take_write)
+lookup_fte_for_write_locked(struct mlx5_flow_group *g, const u32 *match_value)
{
struct fs_fte *fte_tmp;
- if (take_write)
- nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
- else
- nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
- fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
- rhash_fte);
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
+ fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, rhash_fte);
if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
fte_tmp = NULL;
goto out;
}
+
+ if (!fte_tmp->node.active) {
+ tree_put_node(&fte_tmp->node, false);
+ fte_tmp = NULL;
+ goto out;
+ }
+ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
+out:
+ up_write_ref_node(&g->node, false);
+ return fte_tmp;
+}
+
+static struct fs_fte *
+lookup_fte_for_read_locked(struct mlx5_flow_group *g, const u32 *match_value)
+{
+ struct fs_fte *fte_tmp;
+
+ if (!tree_get_node(&g->node))
+ return NULL;
+
+ rcu_read_lock();
+ fte_tmp = rhashtable_lookup(&g->ftes_hash, match_value, rhash_fte);
+ if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+ rcu_read_unlock();
+ fte_tmp = NULL;
+ goto out;
+ }
+ rcu_read_unlock();
+
if (!fte_tmp->node.active) {
tree_put_node(&fte_tmp->node, false);
fte_tmp = NULL;
@@@ -1681,21 -1646,14 +1681,21 @@@
}
nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
out:
- if (take_write)
- up_write_ref_node(&g->node, false);
- else
- up_read_ref_node(&g->node);
+ tree_put_node(&g->node, false);
return fte_tmp;
}
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g, const u32 *match_value, bool write)
+{
+ if (write)
+ return lookup_fte_for_write_locked(g, match_value);
+ else
+ return lookup_fte_for_read_locked(g, match_value);
+}
+
static struct mlx5_flow_handle *
try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct list_head *match_head,
@@@ -1856,13 -1814,6 +1856,13 @@@ search_again_locked
return rule;
}
+ fte = alloc_fte(ft, spec, flow_act);
+ if (IS_ERR(fte)) {
+ up_write_ref_node(&ft->node, false);
+ err = PTR_ERR(fte);
+ goto err_alloc_fte;
+ }
+
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
up_write_ref_node(&ft->node, false);
@@@ -1870,9 -1821,17 +1870,9 @@@
if (err)
goto err_release_fg;
- fte = alloc_fte(ft, spec, flow_act);
- if (IS_ERR(fte)) {
- err = PTR_ERR(fte);
- goto err_release_fg;
- }
-
err = insert_fte(g, fte);
- if (err) {
- kmem_cache_free(steering->ftes_cache, fte);
+ if (err)
goto err_release_fg;
- }
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
@@@ -1884,8 -1843,6 +1884,8 @@@
err_release_fg:
up_write_ref_node(&g->node, false);
+ kmem_cache_free(steering->ftes_cache, fte);
+err_alloc_fte:
tree_put_node(&g->node, false);
return ERR_PTR(err);
}
@@@ -2402,17 -2359,9 +2402,17 @@@ static void set_prio_attrs_in_prio(stru
int acc_level_ns = acc_level;
prio->start_level = acc_level;
- fs_for_each_ns(ns, prio)
+ fs_for_each_ns(ns, prio) {
/* This updates start_level and num_levels of ns's priority descendants */
acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
+
+ /* If this a prio with chains, and we can jump from one chain
+ * (namepsace) to another, so we accumulate the levels
+ */
+ if (prio->node.type == FS_TYPE_PRIO_CHAINS)
+ acc_level = acc_level_ns;
+ }
+
if (!prio->num_levels)
prio->num_levels = acc_level_ns - prio->start_level;
WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
@@@ -2601,109 -2550,58 +2601,109 @@@ out_err
steering->rdma_rx_root_ns = NULL;
return err;
}
-static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
+
+/* FT and tc chains are stored in the same array so we can re-use the
+ * mlx5_get_fdb_sub_ns() and tc api for FT chains.
+ * When creating a new ns for each chain store it in the first available slot.
+ * Assume tc chains are created and stored first and only then the FT chain.
+ */
+static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
+ struct mlx5_flow_namespace *ns)
+{
+ int chain = 0;
+
+ while (steering->fdb_sub_ns[chain])
+ ++chain;
+
+ steering->fdb_sub_ns[chain] = ns;
+}
+
+static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
+ struct fs_prio *maj_prio)
{
struct mlx5_flow_namespace *ns;
- struct fs_prio *maj_prio;
struct fs_prio *min_prio;
+ int prio;
+
+ ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+
+ for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) {
+ min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO);
+ if (IS_ERR(min_prio))
+ return PTR_ERR(min_prio);
+ }
+
+ store_fdb_sub_ns_prio_chain(steering, ns);
+
+ return 0;
+}
+
+static int create_fdb_chains(struct mlx5_flow_steering *steering,
+ int fs_prio,
+ int chains)
+{
+ struct fs_prio *maj_prio;
int levels;
int chain;
- int prio;
int err;
- steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
- if (!steering->fdb_root_ns)
- return -ENOMEM;
+ levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains;
+ maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
+ fs_prio,
+ levels);
+ if (IS_ERR(maj_prio))
+ return PTR_ERR(maj_prio);
+
+ for (chain = 0; chain < chains; chain++) {
+ err = create_fdb_sub_ns_prio_chain(steering, maj_prio);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
+{
+ int err;
- steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
- (FDB_MAX_CHAIN + 1), GFP_KERNEL);
+ steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS,
+ sizeof(*steering->fdb_sub_ns),
+ GFP_KERNEL);
if (!steering->fdb_sub_ns)
return -ENOMEM;
+ err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1);
+ if (err)
+ return err;
+
+ err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *maj_prio;
+ int err;
+
+ steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
+ if (!steering->fdb_root_ns)
+ return -ENOMEM;
+
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
1);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
-
- levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
- maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
- FDB_FAST_PATH,
- levels);
- if (IS_ERR(maj_prio)) {
- err = PTR_ERR(maj_prio);
+ err = create_fdb_fast_path(steering);
+ if (err)
goto out_err;
- }
-
- for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
- ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
- if (IS_ERR(ns)) {
- err = PTR_ERR(ns);
- goto out_err;
- }
-
- for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
- min_prio = fs_create_prio(ns, prio, 2);
- if (IS_ERR(min_prio)) {
- err = PTR_ERR(min_prio);
- goto out_err;
- }
- }
-
- steering->fdb_sub_ns[chain] = ns;
- }
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
if (IS_ERR(maj_prio)) {
diff --combined drivers/net/ethernet/netronome/nfp/bpf/jit.c
index c80bb83c8ac9,276af07a2841..a2c02be6e34c
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@@ -2652,17 -2652,17 +2652,17 @@@ static int mem_ldx_skb(struct nfp_prog
switch (meta->insn.off) {
case offsetof(struct __sk_buff, len):
- if (size != FIELD_SIZEOF(struct __sk_buff, len))
+ if (size != sizeof_member(struct __sk_buff, len))
return -EOPNOTSUPP;
wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
break;
case offsetof(struct __sk_buff, data):
- if (size != FIELD_SIZEOF(struct __sk_buff, data))
+ if (size != sizeof_member(struct __sk_buff, data))
return -EOPNOTSUPP;
wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
break;
case offsetof(struct __sk_buff, data_end):
- if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
+ if (size != sizeof_member(struct __sk_buff, data_end))
return -EOPNOTSUPP;
emit_alu(nfp_prog, dst,
plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
@@@ -2683,12 -2683,12 +2683,12 @@@ static int mem_ldx_xdp(struct nfp_prog
switch (meta->insn.off) {
case offsetof(struct xdp_md, data):
- if (size != FIELD_SIZEOF(struct xdp_md, data))
+ if (size != sizeof_member(struct xdp_md, data))
return -EOPNOTSUPP;
wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
break;
case offsetof(struct xdp_md, data_end):
- if (size != FIELD_SIZEOF(struct xdp_md, data_end))
+ if (size != sizeof_member(struct xdp_md, data_end))
return -EOPNOTSUPP;
emit_alu(nfp_prog, dst,
plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
@@@ -3952,7 -3952,7 +3952,7 @@@ static void nfp_bpf_opt_neg_add_sub(str
static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
{
struct nfp_insn_meta *meta1, *meta2;
- const s32 exp_mask[] = {
+ static const s32 exp_mask[] = {
[BPF_B] = 0x000000ffU,
[BPF_H] = 0x0000ffffU,
[BPF_W] = 0xffffffffU,
diff --combined drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 95a0d3910e31,496a08fd6077..1d71fe20c7bd
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@@ -46,7 -46,9 +46,7 @@@ nfp_map_ptr_record(struct nfp_app_bpf *
/* Grab a single ref to the map for our record. The prog destroy ndo
* happens after free_used_maps().
*/
- map = bpf_map_inc(map, false);
- if (IS_ERR(map))
- return PTR_ERR(map);
+ bpf_map_inc(map);
record = kmalloc(sizeof(*record), GFP_KERNEL);
if (!record) {
@@@ -374,7 -376,7 +374,7 @@@ nfp_bpf_map_alloc(struct nfp_app_bpf *b
}
use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) *
- FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]);
+ sizeof_member(struct nfp_bpf_map, use_map[0]);
nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER);
if (!nfp_map)
@@@ -458,8 -460,8 +458,8 @@@ int nfp_bpf_event_output(struct nfp_app
return -EINVAL;
rcu_read_lock();
- record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id,
- nfp_bpf_maps_neutral_params);
+ record = rhashtable_lookup(&bpf->maps_neutral, &map_id,
+ nfp_bpf_maps_neutral_params);
if (!record || map_id_full > U32_MAX) {
rcu_read_unlock();
cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping
event\n",
diff --combined drivers/net/ethernet/ti/netcp_ethss.c
index 86a3f42a3dcc,452597f7f5ae..4b54c1a324a7
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@@ -783,28 -783,28 +783,28 @@@ struct netcp_ethtool_stat
#define GBE_STATSA_INFO(field) \
{ \
"GBE_A:"#field, GBE_STATSA_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_member(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
#define GBE_STATSB_INFO(field) \
{ \
"GBE_B:"#field, GBE_STATSB_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_member(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
#define GBE_STATSC_INFO(field) \
{ \
"GBE_C:"#field, GBE_STATSC_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_member(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
#define GBE_STATSD_INFO(field) \
{ \
"GBE_D:"#field, GBE_STATSD_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_member(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
@@@ -957,7 -957,7 +957,7 @@@ static const struct netcp_ethtool_stat
#define GBENU_STATS_HOST(field) \
{ \
"GBE_HOST:"#field, GBENU_STATS0_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
@@@ -967,56 -967,56 +967,56 @@@
#define GBENU_STATS_P1(field) \
{ \
"GBE_P1:"#field, GBENU_STATS1_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P2(field) \
{ \
"GBE_P2:"#field, GBENU_STATS2_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P3(field) \
{ \
"GBE_P3:"#field, GBENU_STATS3_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P4(field) \
{ \
"GBE_P4:"#field, GBENU_STATS4_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P5(field) \
{ \
"GBE_P5:"#field, GBENU_STATS5_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P6(field) \
{ \
"GBE_P6:"#field, GBENU_STATS6_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P7(field) \
{ \
"GBE_P7:"#field, GBENU_STATS7_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P8(field) \
{ \
"GBE_P8:"#field, GBENU_STATS8_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_member(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
@@@ -1607,21 -1607,21 +1607,21 @@@ static const struct netcp_ethtool_stat
#define XGBE_STATS0_INFO(field) \
{ \
"GBE_0:"#field, XGBE_STATS0_MODULE, \
- FIELD_SIZEOF(struct xgbe_hw_stats, field), \
+ sizeof_member(struct xgbe_hw_stats, field), \
offsetof(struct xgbe_hw_stats, field) \
}
#define XGBE_STATS1_INFO(field) \
{ \
"GBE_1:"#field, XGBE_STATS1_MODULE, \
- FIELD_SIZEOF(struct xgbe_hw_stats, field), \
+ sizeof_member(struct xgbe_hw_stats, field), \
offsetof(struct xgbe_hw_stats, field) \
}
#define XGBE_STATS2_INFO(field) \
{ \
"GBE_2:"#field, XGBE_STATS2_MODULE, \
- FIELD_SIZEOF(struct xgbe_hw_stats, field), \
+ sizeof_member(struct xgbe_hw_stats, field), \
offsetof(struct xgbe_hw_stats, field) \
}
@@@ -2291,7 -2291,6 +2291,7 @@@ static int gbe_slave_open(struct gbe_in
struct gbe_slave *slave = gbe_intf->slave;
phy_interface_t phy_mode;
bool has_phy = false;
+ int err;
void (*hndlr)(struct net_device *) = gbe_adjust_link;
@@@ -2321,11 -2320,11 +2321,11 @@@
slave->phy_port_t = PORT_MII;
} else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
has_phy = true;
- phy_mode = of_get_phy_mode(slave->node);
+ err = of_get_phy_mode(slave->node, &phy_mode);
/* if phy-mode is not present, default to
* PHY_INTERFACE_MODE_RGMII
*/
- if (phy_mode < 0)
+ if (err)
phy_mode = PHY_INTERFACE_MODE_RGMII;
if (!phy_interface_mode_is_rgmii(phy_mode)) {
diff --combined drivers/net/hyperv/netvsc_drv.c
index 868e22e286ca,6102264d9978..a3bd350672b0
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@@ -285,9 -285,9 +285,9 @@@ static inline u32 netvsc_get_hash
else if (flow.basic.n_proto == htons(ETH_P_IPV6))
hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
else
- hash = 0;
+ return 0;
- skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+ __skb_set_sw_hash(skb, hash, false);
}
return hash;
@@@ -571,7 -571,7 +571,7 @@@ static int netvsc_start_xmit(struct sk_
/* Use the skb control buffer for building up the packet */
BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_member(struct sk_buff, cb));
packet = (struct hv_netvsc_packet *)skb->cb;
packet->q_idx = skb_get_queue_mapping(skb);
@@@ -766,7 -766,6 +766,7 @@@ static struct sk_buff *netvsc_alloc_rec
const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
const struct ndis_tcp_ip_checksum_info *csum_info =
nvchan->rsc.csum_info;
+ const u32 *hash_info = nvchan->rsc.hash_info;
struct sk_buff *skb;
int i;
@@@ -796,16 -795,14 +796,16 @@@
skb->protocol == htons(ETH_P_IP))
netvsc_comp_ipcsum(skb);
- /* Do L4 checksum offload if enabled and present.
- */
+ /* Do L4 checksum offload if enabled and present. */
if (csum_info && (net->features & NETIF_F_RXCSUM)) {
if (csum_info->receive.tcp_checksum_succeeded ||
csum_info->receive.udp_checksum_succeeded)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
+ if (hash_info && (net->features & NETIF_F_RXHASH))
+ skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
+
if (vlan) {
u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) |
(vlan->cfi ? VLAN_CFI_MASK : 0);
@@@ -985,7 -982,7 +985,7 @@@ static int netvsc_attach(struct net_dev
if (netif_running(ndev)) {
ret = rndis_filter_open(nvdev);
if (ret)
- return ret;
+ goto err;
rdev = nvdev->extension;
if (!rdev->link_state)
@@@ -993,13 -990,6 +993,13 @@@
}
return 0;
+
+err:
+ netif_device_detach(ndev);
+
+ rndis_filter_device_remove(hdev, nvdev);
+
+ return ret;
}
static int netvsc_set_channels(struct net_device *net,
@@@ -1817,10 -1807,8 +1817,10 @@@ static int netvsc_set_features(struct n
ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
- if (ret)
+ if (ret) {
features ^= NETIF_F_LRO;
+ ndev->features = features;
+ }
syncvf:
if (!vf_netdev)
@@@ -2347,6 -2335,8 +2347,6 @@@ static int netvsc_probe(struct hv_devic
NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
- netdev_lockdep_set_classes(net);
-
/* MTU range: 68 - 1500 or 65521 */
net->min_mtu = NETVSC_MTU_MIN;
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
diff --combined drivers/net/usb/usbnet.c
index 30e511c2c8d0,217ea0126f93..bad73cdbddec
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@@ -1573,13 -1573,6 +1573,13 @@@ static void usbnet_bh (struct timer_lis
}
}
+static void usbnet_bh_tasklet(unsigned long data)
+{
+ struct timer_list *t = (struct timer_list *)data;
+
+ usbnet_bh(t);
+}
+
/*-------------------------------------------------------------------------
*
@@@ -1707,7 -1700,7 +1707,7 @@@ usbnet_probe (struct usb_interface *ude
skb_queue_head_init (&dev->txq);
skb_queue_head_init (&dev->done);
skb_queue_head_init(&dev->rxq_pause);
- dev->bh.func = (void (*)(unsigned long))usbnet_bh;
+ dev->bh.func = usbnet_bh_tasklet;
dev->bh.data = (unsigned long)&dev->delay;
INIT_WORK (&dev->kevent, usbnet_deferred_kevent);
init_usb_anchor(&dev->deferred);
@@@ -2184,7 -2177,7 +2184,7 @@@ static int __init usbnet_init(void
{
/* Compiler should optimize this out. */
BUILD_BUG_ON(
- FIELD_SIZEOF(struct sk_buff, cb) < sizeof(struct skb_data));
+ sizeof_member(struct sk_buff, cb) < sizeof(struct skb_data));
eth_random_addr(node_id);
return 0;
diff --combined drivers/net/vxlan.c
index bf04bc2e68c2,27bfde225a73..8d4e17f0e6e4
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -793,7 -793,8 +793,7 @@@ static int vxlan_gro_complete(struct so
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
-static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
- const u8 *mac, __u16 state,
+static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
__be32 src_vni, __u16 ndm_flags)
{
struct vxlan_fdb *f;
@@@ -834,7 -835,7 +834,7 @@@ static int vxlan_fdb_create(struct vxla
return -ENOSPC;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
- f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+ f = vxlan_fdb_alloc(mac, state, src_vni, ndm_flags);
if (!f)
return -ENOMEM;
@@@ -2486,11 -2487,9 +2486,11 @@@ static void vxlan_xmit_one(struct sk_bu
vni = tunnel_id_to_key32(info->key.tun_id);
ifindex = 0;
dst_cache = &info->dst_cache;
- if (info->options_len &&
- info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ if (info->options_len < sizeof(*md))
+ goto drop;
md = ip_tunnel_info_opts(info);
+ }
ttl = info->key.ttl;
tos = info->key.tos;
label = info->key.label;
@@@ -3071,10 -3070,10 +3071,10 @@@ static void vxlan_raw_setup(struct net_
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
- [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VXLAN_GROUP] = { .len = sizeof_member(struct iphdr, daddr) },
[IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_LINK] = { .type = NLA_U32 },
- [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+ [IFLA_VXLAN_LOCAL] = { .len = sizeof_member(struct iphdr, saddr) },
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
@@@ -3175,29 -3174,9 +3175,29 @@@ static void vxlan_get_drvinfo(struct ne
strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
}
+static int vxlan_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+ dst->remote_ifindex);
+
+ if (!lowerdev) {
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ cmd->base.port = PORT_OTHER;
+ cmd->base.speed = SPEED_UNKNOWN;
+
+ return 0;
+ }
+
+ return __ethtool_get_link_ksettings(lowerdev, cmd);
+}
+
static const struct ethtool_ops vxlan_ethtool_ops = {
- .get_drvinfo = vxlan_get_drvinfo,
- .get_link = ethtool_op_get_link,
+ .get_drvinfo = vxlan_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = vxlan_get_link_ksettings,
};
static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
@@@ -3587,13 -3566,10 +3587,13 @@@ static int __vxlan_dev_create(struct ne
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct net_device *remote_dev = NULL;
struct vxlan_fdb *f = NULL;
bool unregister = false;
+ struct vxlan_rdst *dst;
int err;
+ dst = &vxlan->default_dst;
err = vxlan_dev_configure(net, dev, conf, false, extack);
if (err)
return err;
@@@ -3601,14 -3577,14 +3601,14 @@@
dev->ethtool_ops = &vxlan_ethtool_ops;
/* create an fdb entry for a valid default destination */
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+ if (!vxlan_addr_any(&dst->remote_ip)) {
err = vxlan_fdb_create(vxlan, all_zeros_mac,
- &vxlan->default_dst.remote_ip,
+ &dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
vxlan->cfg.dst_port,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_ifindex,
+ dst->remote_vni,
+ dst->remote_vni,
+ dst->remote_ifindex,
NTF_SELF, &f);
if (err)
return err;
@@@ -3619,41 -3595,26 +3619,41 @@@
goto errout;
unregister = true;
+ if (dst->remote_ifindex) {
+ remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
+ if (!remote_dev)
+ goto errout;
+
+ err = netdev_upper_dev_link(remote_dev, dev, extack);
+ if (err)
+ goto errout;
+ }
+
err = rtnl_configure_link(dev, NULL);
if (err)
- goto errout;
+ goto unlink;
if (f) {
- vxlan_fdb_insert(vxlan, all_zeros_mac,
- vxlan->default_dst.remote_vni, f);
+ vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
/* notify default fdb entry */
err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
RTM_NEWNEIGH, true, extack);
if (err) {
vxlan_fdb_destroy(vxlan, f, false, false);
+ if (remote_dev)
+ netdev_upper_dev_unlink(remote_dev, dev);
goto unregister;
}
}
list_add(&vxlan->next, &vn->vxlan_list);
+ if (remote_dev)
+ dst->remote_dev = remote_dev;
return 0;
-
+unlink:
+ if (remote_dev)
+ netdev_upper_dev_unlink(remote_dev, dev);
errout:
/* unregister_netdevice() destroys the default FDB entry with deletion
* notification. But the addition notification was not sent yet, so
@@@ -3971,12 -3932,11 +3971,12 @@@ static int vxlan_changelink(struct net_
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_rdst *dst = &vxlan->default_dst;
struct net_device *lowerdev;
struct vxlan_config conf;
+ struct vxlan_rdst *dst;
int err;
+ dst = &vxlan->default_dst;
err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
if (err)
return err;
@@@ -3986,14 -3946,6 +3986,14 @@@
if (err)
return err;
+ if (dst->remote_dev == lowerdev)
+ lowerdev = NULL;
+
+ err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
+ extack);
+ if (err)
+ return err;
+
/* handle default dst entry */
if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
@@@ -4010,8 -3962,6 +4010,8 @@@
NTF_SELF, true, extack);
if (err) {
spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ netdev_adjacent_change_abort(dst->remote_dev,
+ lowerdev, dev);
return err;
}
}
@@@ -4029,11 -3979,6 +4029,11 @@@
if (conf.age_interval != vxlan->cfg.age_interval)
mod_timer(&vxlan->age_timer, jiffies);
+ netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
+ if (lowerdev && lowerdev != dst->remote_dev) {
+ dst->remote_dev = lowerdev;
+ netdev_update_lockdep_key(lowerdev);
+ }
vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
return 0;
}
@@@ -4046,8 -3991,6 +4046,8 @@@ static void vxlan_dellink(struct net_de
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
+ if (vxlan->default_dst.remote_dev)
+ netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
}
static size_t vxlan_get_size(const struct net_device *dev)
diff --combined drivers/s390/net/qeth_core_main.c
index efcbe60220d1,5c6642b2415a..157281c70c3b
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@@ -901,30 -901,30 +901,30 @@@ static int qeth_get_problem(struct qeth
CCW_DEVID(cdev), dstat, cstat);
print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET,
16, 1, irb, 64, 1);
- return 1;
+ return -EIO;
}
if (dstat & DEV_STAT_UNIT_CHECK) {
if (sense[SENSE_RESETTING_EVENT_BYTE] &
SENSE_RESETTING_EVENT_FLAG) {
QETH_CARD_TEXT(card, 2, "REVIND");
- return 1;
+ return -EIO;
}
if (sense[SENSE_COMMAND_REJECT_BYTE] &
SENSE_COMMAND_REJECT_FLAG) {
QETH_CARD_TEXT(card, 2, "CMDREJi");
- return 1;
+ return -EIO;
}
if ((sense[2] == 0xaf) && (sense[3] == 0xfe)) {
QETH_CARD_TEXT(card, 2, "AFFE");
- return 1;
+ return -EIO;
}
if ((!sense[0]) && (!sense[1]) && (!sense[2]) && (!sense[3]))
{
QETH_CARD_TEXT(card, 2, "ZEROSEN");
return 0;
}
QETH_CARD_TEXT(card, 2, "DGENCHK");
- return 1;
+ return -EIO;
}
return 0;
}
@@@ -1513,6 -1513,7 +1513,6 @@@ int qeth_qdio_clear_card(struct qeth_ca
rc = qeth_clear_halt_card(card, use_halt);
if (rc)
QETH_CARD_TEXT_(card, 3, "2err%d", rc);
- card->state = CARD_STATE_DOWN;
return rc;
}
EXPORT_SYMBOL_GPL(qeth_qdio_clear_card);
@@@ -1956,7 -1957,6 +1956,7 @@@ static void qeth_idx_setup_activate_cmd
ccw_device_get_id(CARD_DDEV(card), &dev_id);
iob->finalize = qeth_idx_finalize_cmd;
+ port |= QETH_IDX_ACT_INVAL_FRAME;
memcpy(QETH_IDX_ACT_PNO(iob->data), &port, 1);
memcpy(QETH_IDX_ACT_ISSUER_RM_TOKEN(iob->data),
&card->token.issuer_rm_w, QETH_MPC_TOKEN_LENGTH);
@@@ -2634,18 -2634,6 +2634,18 @@@ static int qeth_init_input_buffer(struc
return 0;
}
+static unsigned int qeth_tx_select_bulk_max(struct qeth_card *card,
+ struct qeth_qdio_out_q *queue)
+{
+ if (!IS_IQD(card) ||
+ qeth_iqd_is_mcast_queue(card, queue) ||
+ card->options.cq == QETH_CQ_ENABLED ||
+ qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd))
+ return 1;
+
+ return card->ssqd.mmwc ? card->ssqd.mmwc : 1;
+}
+
int qeth_init_qdio_queues(struct qeth_card *card)
{
unsigned int i;
@@@ -2685,8 -2673,6 +2685,8 @@@
queue->do_pack = 0;
queue->prev_hdr = NULL;
queue->bulk_start = 0;
+ queue->bulk_count = 0;
+ queue->bulk_max = qeth_tx_select_bulk_max(card, queue);
atomic_set(&queue->used_buffers, 0);
atomic_set(&queue->set_pci_flags_count, 0);
atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
@@@ -3094,7 -3080,7 +3094,7 @@@ static int qeth_check_qdio_errors(struc
buf->element[14].sflags);
QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error);
if ((buf->element[15].sflags) == 0x12) {
- QETH_CARD_STAT_INC(card, rx_dropped);
+ QETH_CARD_STAT_INC(card, rx_fifo_errors);
return 0;
} else
return 1;
@@@ -3121,7 -3107,7 +3121,7 @@@ static void qeth_queue_input_buffer(str
for (i = queue->next_buf_to_init;
i < queue->next_buf_to_init + count; ++i) {
if (qeth_init_input_buffer(card,
- &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q])) {
+ &queue->bufs[QDIO_BUFNR(i)])) {
break;
} else {
newcount++;
@@@ -3163,8 -3149,8 +3163,8 @@@
if (rc) {
QETH_CARD_TEXT(card, 2, "qinberr");
}
- queue->next_buf_to_init = (queue->next_buf_to_init + count) %
- QDIO_MAX_BUFFERS_PER_Q;
+ queue->next_buf_to_init = QDIO_BUFNR(queue->next_buf_to_init +
+ count);
}
}
@@@ -3212,7 -3198,7 +3212,7 @@@ static int qeth_prep_flush_pack_buffer(
/* it's a packing buffer */
atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
queue->next_buf_to_fill =
- (queue->next_buf_to_fill + 1) % QDIO_MAX_BUFFERS_PER_Q;
+ QDIO_BUFNR(queue->next_buf_to_fill + 1);
return 1;
}
return 0;
@@@ -3266,8 -3252,7 +3266,8 @@@ static void qeth_flush_buffers(struct q
unsigned int qdio_flags;
for (i = index; i < index + count; ++i) {
- int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+ unsigned int bidx = QDIO_BUFNR(i);
+
buf = queue->bufs[bidx];
buf->buffer->element[buf->next_element_to_fill - 1].eflags |=
SBAL_EFLAGS_LAST_ENTRY;
@@@ -3333,11 -3318,10 +3333,11 @@@
static void qeth_flush_queue(struct qeth_qdio_out_q *queue)
{
- qeth_flush_buffers(queue, queue->bulk_start, 1);
+ qeth_flush_buffers(queue, queue->bulk_start, queue->bulk_count);
- queue->bulk_start = QDIO_BUFNR(queue->bulk_start + 1);
+ queue->bulk_start = QDIO_BUFNR(queue->bulk_start + queue->bulk_count);
queue->prev_hdr = NULL;
+ queue->bulk_count = 0;
}
static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
@@@ -3435,7 -3419,8 +3435,7 @@@ static void qeth_qdio_cq_handler(struc
}
for (i = first_element; i < first_element + count; ++i) {
- int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
- struct qdio_buffer *buffer = cq->qdio_bufs[bidx];
+ struct qdio_buffer *buffer = cq->qdio_bufs[QDIO_BUFNR(i)];
int e = 0;
while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
@@@ -3456,8 -3441,8 +3456,8 @@@
"QDIO reported an error, rc=%i\n", rc);
QETH_CARD_TEXT(card, 2, "qcqherr");
}
- card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init
- + count) % QDIO_MAX_BUFFERS_PER_Q;
+
+ cq->next_buf_to_init = QDIO_BUFNR(cq->next_buf_to_init + count);
}
static void qeth_qdio_input_handler(struct ccw_device *ccwdev,
@@@ -3483,6 -3468,7 +3483,6 @@@ static void qeth_qdio_output_handler(st
{
struct qeth_card *card = (struct qeth_card *) card_ptr;
struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
- struct qeth_qdio_out_buffer *buffer;
struct net_device *dev = card->dev;
struct netdev_queue *txq;
int i;
@@@ -3496,10 -3482,10 +3496,10 @@@
}
for (i = first_element; i < (first_element + count); ++i) {
- int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
- buffer = queue->bufs[bidx];
- qeth_handle_send_error(card, buffer, qdio_error);
- qeth_clear_output_buffer(queue, buffer, qdio_error, 0);
+ struct qeth_qdio_out_buffer *buf = queue->bufs[QDIO_BUFNR(i)];
+
+ qeth_handle_send_error(card, buf, qdio_error);
+ qeth_clear_output_buffer(queue, buf, qdio_error, 0);
}
atomic_sub(count, &queue->used_buffers);
@@@ -3694,10 -3680,10 +3694,10 @@@ check_layout
}
static bool qeth_iqd_may_bulk(struct qeth_qdio_out_q *queue,
- struct qeth_qdio_out_buffer *buffer,
struct sk_buff *curr_skb,
struct qeth_hdr *curr_hdr)
{
+ struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start];
struct qeth_hdr *prev_hdr = queue->prev_hdr;
if (!prev_hdr)
@@@ -3817,14 -3803,13 +3817,14 @@@ static int __qeth_xmit(struct qeth_car
struct qeth_hdr *hdr, unsigned int offset,
unsigned int hd_len)
{
- struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start];
unsigned int bytes = qdisc_pkt_len(skb);
+ struct qeth_qdio_out_buffer *buffer;
unsigned int next_element;
struct netdev_queue *txq;
bool stopped = false;
bool flush;
+ buffer = queue->bufs[QDIO_BUFNR(queue->bulk_start + queue->bulk_count)];
txq = netdev_get_tx_queue(card->dev, skb_get_queue_mapping(skb));
/* Just a sanity check, the wake/stop logic should ensure that we always
@@@ -3833,23 -3818,11 +3833,23 @@@
if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
return -EBUSY;
- if ((buffer->next_element_to_fill + elements > queue->max_elements) ||
- !qeth_iqd_may_bulk(queue, buffer, skb, hdr)) {
- atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
- qeth_flush_queue(queue);
- buffer = queue->bufs[queue->bulk_start];
+ flush = !qeth_iqd_may_bulk(queue, skb, hdr);
+
+ if (flush ||
+ (buffer->next_element_to_fill + elements > queue->max_elements)) {
+ if (buffer->next_element_to_fill > 0) {
+ atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
+ queue->bulk_count++;
+ }
+
+ if (queue->bulk_count >= queue->bulk_max)
+ flush = true;
+
+ if (flush)
+ qeth_flush_queue(queue);
+
+ buffer = queue->bufs[QDIO_BUFNR(queue->bulk_start +
+ queue->bulk_count)];
/* Sanity-check again: */
if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
@@@ -3875,13 -3848,7 +3875,13 @@@
if (flush || next_element >= queue->max_elements) {
atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
- qeth_flush_queue(queue);
+ queue->bulk_count++;
+
+ if (queue->bulk_count >= queue->bulk_max)
+ flush = true;
+
+ if (flush)
+ qeth_flush_queue(queue);
}
if (stopped && !qeth_out_queue_is_full(queue))
@@@ -3931,7 -3898,8 +3931,7 @@@ int qeth_do_send_packet(struct qeth_car
atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
flush_count++;
queue->next_buf_to_fill =
- (queue->next_buf_to_fill + 1) %
- QDIO_MAX_BUFFERS_PER_Q;
+ QDIO_BUFNR(queue->next_buf_to_fill + 1);
buffer = queue->bufs[queue->next_buf_to_fill];
/* We stepped forward, so sanity-check again: */
@@@ -3964,8 -3932,8 +3964,8 @@@
if (!queue->do_pack || stopped || next_element >= queue->max_elements) {
flush_count++;
atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED);
- queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) %
- QDIO_MAX_BUFFERS_PER_Q;
+ queue->next_buf_to_fill =
+ QDIO_BUFNR(queue->next_buf_to_fill + 1);
}
if (flush_count)
@@@ -4293,6 -4261,7 +4293,6 @@@ int qeth_set_access_ctrl_online(struct
}
return rc;
}
-EXPORT_SYMBOL_GPL(qeth_set_access_ctrl_online);
void qeth_tx_timeout(struct net_device *dev)
{
@@@ -4347,9 -4316,7 +4347,9 @@@ static int qeth_mdio_read(struct net_de
case MII_NWAYTEST: /* N-way auto-neg test register */
break;
case MII_RERRCOUNTER: /* rx error counter */
- rc = card->stats.rx_errors;
+ rc = card->stats.rx_length_errors +
+ card->stats.rx_frame_errors +
+ card->stats.rx_fifo_errors;
break;
case MII_SREVISION: /* silicon revision */
break;
@@@ -4748,7 -4715,7 +4748,7 @@@ static int qeth_qdio_establish(struct q
QETH_CARD_TEXT(card, 2, "qdioest");
- qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL);
+ qib_param_field = kzalloc(sizeof_member(struct qib, parm), GFP_KERNEL);
if (!qib_param_field) {
rc = -ENOMEM;
goto out_free_nothing;
@@@ -4855,6 -4822,7 +4855,6 @@@ static void qeth_core_free_card(struct
qeth_clean_channel(&card->data);
qeth_put_cmd(card->read_cmd);
destroy_workqueue(card->event_wq);
- qeth_free_qdio_queues(card);
unregister_service_level(&card->qeth_service_level);
dev_set_drvdata(&card->gdev->dev, NULL);
kfree(card);
@@@ -5009,15 -4977,6 +5009,15 @@@ retriable
goto out;
}
}
+
+ if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) ||
+ (card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM)))
+ card->info.hwtrap = 0;
+
+ rc = qeth_set_access_ctrl_online(card, 0);
+ if (rc)
+ goto out;
+
return 0;
out:
dev_warn(&card->gdev->dev, "The qeth device driver failed to recover
"
@@@ -5064,14 -5023,13 +5064,14 @@@ struct sk_buff *qeth_core_get_next_skb(
struct qdio_buffer_element *element = *__element;
struct qdio_buffer *buffer = qethbuffer->buffer;
int offset = *__offset;
+ bool use_rx_sg = false;
+ unsigned int headroom;
struct sk_buff *skb;
int skb_len = 0;
void *data_ptr;
int data_len;
- int headroom = 0;
- int use_rx_sg = 0;
+next_packet:
/* qeth_hdr must not cross element boundaries */
while (element->length < offset + sizeof(struct qeth_hdr)) {
if (qeth_is_last_sbale(element))
@@@ -5085,45 -5043,27 +5085,45 @@@
switch ((*hdr)->hdr.l2.id) {
case QETH_HEADER_TYPE_LAYER2:
skb_len = (*hdr)->hdr.l2.pkt_length;
+ headroom = 0;
break;
case QETH_HEADER_TYPE_LAYER3:
skb_len = (*hdr)->hdr.l3.length;
+ if (!IS_LAYER3(card)) {
+ QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
+ skb = NULL;
+ goto walk_packet;
+ }
+
headroom = ETH_HLEN;
break;
case QETH_HEADER_TYPE_OSN:
skb_len = (*hdr)->hdr.osn.pdu_length;
+ if (!IS_OSN(card)) {
+ QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
+ skb = NULL;
+ goto walk_packet;
+ }
+
headroom = sizeof(struct qeth_hdr);
break;
default:
- break;
+ if ((*hdr)->hdr.l2.id & QETH_HEADER_MASK_INVAL)
+ QETH_CARD_STAT_INC(card, rx_frame_errors);
+ else
+ QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
+
+ /* Can't determine packet length, drop the whole buffer. */
+ return NULL;
}
if (!skb_len)
return NULL;
- if (((skb_len >= card->options.rx_sg_cb) &&
- !IS_OSN(card) &&
- (!atomic_read(&card->force_alloc_skb))) ||
- (card->options.cq == QETH_CQ_ENABLED))
- use_rx_sg = 1;
+ use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) ||
+ ((skb_len >= card->options.rx_sg_cb) &&
+ !atomic_read(&card->force_alloc_skb) &&
+ !IS_OSN(card));
if (use_rx_sg && qethbuffer->rx_skb) {
/* QETH_CQ_ENABLED only: */
@@@ -5134,18 -5074,15 +5134,18 @@@
skb = napi_alloc_skb(&card->napi, linear + headroom);
}
+
if (!skb)
- goto no_mem;
- if (headroom)
+ QETH_CARD_STAT_INC(card, rx_dropped_nomem);
+ else if (headroom)
skb_reserve(skb, headroom);
+walk_packet:
data_ptr = element->addr + offset;
while (skb_len) {
data_len = min(skb_len, (int)(element->length - offset));
- if (data_len) {
+
+ if (skb && data_len) {
if (use_rx_sg)
qeth_create_skb_frag(element, skb, offset,
data_len);
@@@ -5157,11 -5094,8 +5157,11 @@@
if (qeth_is_last_sbale(element)) {
QETH_CARD_TEXT(card, 4, "unexeob");
QETH_CARD_HEX(card, 2, buffer, sizeof(void *));
- dev_kfree_skb_any(skb);
- QETH_CARD_STAT_INC(card, rx_errors);
+ if (skb) {
+ dev_kfree_skb_any(skb);
+ QETH_CARD_STAT_INC(card,
+ rx_length_errors);
+ }
return NULL;
}
element++;
@@@ -5171,11 -5105,6 +5171,11 @@@
offset += data_len;
}
}
+
+ /* This packet was skipped, go get another one: */
+ if (!skb)
+ goto next_packet;
+
*__element = element;
*__offset = offset;
if (use_rx_sg) {
@@@ -5184,6 -5113,12 +5184,6 @@@
skb_shinfo(skb)->nr_frags);
}
return skb;
-no_mem:
- if (net_ratelimit()) {
- QETH_CARD_TEXT(card, 2, "noskbmem");
- }
- QETH_CARD_STAT_INC(card, rx_dropped);
- return NULL;
}
EXPORT_SYMBOL_GPL(qeth_core_get_next_skb);
@@@ -5230,7 -5165,8 +5230,7 @@@ int qeth_poll(struct napi_struct *napi
card->rx.b_count--;
if (card->rx.b_count) {
card->rx.b_index =
- (card->rx.b_index + 1) %
- QDIO_MAX_BUFFERS_PER_Q;
+ QDIO_BUFNR(card->rx.b_index + 1);
card->rx.b_element =
&card->qdio.in_q
->bufs[card->rx.b_index]
@@@ -5246,9 -5182,9 +5246,9 @@@
}
}
- napi_complete_done(napi, work_done);
- if (qdio_start_irq(card->data.ccwdev, 0))
- napi_schedule(&card->napi);
+ if (napi_complete_done(napi, work_done) &&
+ qdio_start_irq(CARD_DDEV(card), 0))
+ napi_schedule(napi);
out:
return work_done;
}
@@@ -5767,8 -5703,6 +5767,8 @@@ static void qeth_core_remove_device(str
qeth_core_free_discipline(card);
}
+ qeth_free_qdio_queues(card);
+
free_netdev(card->dev);
qeth_core_free_card(card);
put_device(&gdev->dev);
@@@ -6264,15 -6198,9 +6264,15 @@@ void qeth_get_stats64(struct net_devic
stats->rx_packets = card->stats.rx_packets;
stats->rx_bytes = card->stats.rx_bytes;
- stats->rx_errors = card->stats.rx_errors;
- stats->rx_dropped = card->stats.rx_dropped;
+ stats->rx_errors = card->stats.rx_length_errors +
+ card->stats.rx_frame_errors +
+ card->stats.rx_fifo_errors;
+ stats->rx_dropped = card->stats.rx_dropped_nomem +
+ card->stats.rx_dropped_notsupp;
stats->multicast = card->stats.rx_multicast;
+ stats->rx_length_errors = card->stats.rx_length_errors;
+ stats->rx_frame_errors = card->stats.rx_frame_errors;
+ stats->rx_fifo_errors = card->stats.rx_fifo_errors;
for (i = 0; i < card->qdio.no_out_queues; i++) {
queue = card->qdio.out_qs[i];
diff --combined drivers/s390/net/qeth_core_mpc.h
index 53fcf6641154,62aebf5cf4ef..60d2289cd819
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@@ -11,7 -11,6 +11,7 @@@
#include <asm/qeth.h>
#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in6.h>
#define IPA_PDU_HEADER_SIZE 0x40
#define QETH_IPA_PDU_LEN_TOTAL(buffer) (buffer + 0x0e)
@@@ -366,7 -365,8 +366,7 @@@ struct qeth_ipacmd_setdelip6
struct qeth_ipacmd_setdelipm {
__u8 mac[6];
__u8 padding[2];
- __u8 ip6[12];
- __u8 ip4[4];
+ struct in6_addr ip;
} __attribute__ ((packed));
struct qeth_ipacmd_layer2setdelmac {
@@@ -435,7 -435,7 +435,7 @@@ struct qeth_ipacmd_setassparms
} data;
} __attribute__ ((packed));
- #define SETASS_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setassparms,\
+ #define SETASS_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_setassparms,\
data.field)
/* SETRTG IPA Command: ****************************************************/
@@@ -549,7 -549,7 +549,7 @@@ struct qeth_ipacmd_setadpparms
} data;
} __attribute__ ((packed));
- #define SETADP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setadpparms,\
+ #define SETADP_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_setadpparms,\
data.field)
/* CREATE_ADDR IPA Command: ***********************************************/
@@@ -662,7 -662,7 +662,7 @@@ struct qeth_ipacmd_vnicc
} data;
};
- #define VNICC_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_vnicc,\
+ #define VNICC_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_vnicc,\
data.field)
/* SETBRIDGEPORT IPA Command: *********************************************/
@@@ -743,7 -743,7 +743,7 @@@ struct qeth_ipacmd_setbridgeport
} data;
} __packed;
- #define SBP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setbridgeport,\
+ #define SBP_DATA_SIZEOF(field) sizeof_member(struct qeth_ipacmd_setbridgeport,\
data.field)
/* ADDRESS_CHANGE_NOTIFICATION adapter-initiated "command"
*******************/
@@@ -804,7 -804,7 +804,7 @@@ struct qeth_ipa_cmd
} data;
} __attribute__ ((packed));
- #define IPA_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipa_cmd, data.field)
+ #define IPA_DATA_SIZEOF(field) sizeof_member(struct qeth_ipa_cmd, data.field)
/*
* special command for ARP processing.
@@@ -900,7 -900,6 +900,7 @@@ extern unsigned char IDX_ACTIVATE_WRITE
#define IDX_ACTIVATE_SIZE 0x22
#define QETH_IDX_ACT_PNO(buffer) (buffer+0x0b)
#define QETH_IDX_ACT_ISSUER_RM_TOKEN(buffer) (buffer + 0x0c)
+#define QETH_IDX_ACT_INVAL_FRAME 0x40
#define QETH_IDX_NO_PORTNAME_REQUIRED(buffer) ((buffer)[0x0b] & 0x80)
#define QETH_IDX_ACT_FUNC_LEVEL(buffer) (buffer + 0x10)
#define QETH_IDX_ACT_DATASET_NAME(buffer) (buffer + 0x16)
diff --combined drivers/scsi/aacraid/aachba.c
index e36608ce937a,f30fe29b5701..3b232b6a3eff
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@@ -535,7 -535,7 +535,7 @@@ static void get_container_name_callback
if ((le32_to_cpu(get_name_reply->status) == CT_OK)
&& (get_name_reply->data[0] != '\0')) {
char *sp = get_name_reply->data;
- int data_size = FIELD_SIZEOF(struct aac_get_name_resp, data);
+ int data_size = sizeof_member(struct aac_get_name_resp, data);
sp[data_size - 1] = '\0';
while (*sp == ' ')
@@@ -574,7 -574,7 +574,7 @@@ static int aac_get_container_name(struc
dev = (struct aac_dev *)scsicmd->device->host->hostdata;
- data_size = FIELD_SIZEOF(struct aac_get_name_resp, data);
+ data_size = sizeof_member(struct aac_get_name_resp, data);
cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd);
@@@ -1477,7 -1477,6 +1477,7 @@@ static struct aac_srb * aac_scsi_common
struct aac_srb * srbcmd;
u32 flag;
u32 timeout;
+ struct aac_dev *dev = fib->dev;
aac_fib_init(fib);
switch(cmd->sc_data_direction){
@@@ -1504,7 -1503,7 +1504,7 @@@
srbcmd->flags = cpu_to_le32(flag);
timeout = cmd->request->timeout/HZ;
if (timeout == 0)
- timeout = 1;
+ timeout = (dev->sa_firmware ? AAC_SA_TIMEOUT : AAC_ARC_TIMEOUT);
srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds
srbcmd->retry_limit = 0; /* Obsolete parameter */
srbcmd->cdb_size = cpu_to_le32(cmd->cmd_len);
@@@ -2468,13 -2467,13 +2468,13 @@@ static int aac_read(struct scsi_cmnd *
scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 |
SAM_STAT_CHECK_CONDITION;
set_sense(&dev->fsa_dev[cid].sense_data,
- HARDWARE_ERROR, SENCODE_INTERNAL_TARGET_FAILURE,
+ ILLEGAL_REQUEST, SENCODE_LBA_OUT_OF_RANGE,
ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0);
memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
min_t(size_t, sizeof(dev->fsa_dev[cid].sense_data),
SCSI_SENSE_BUFFERSIZE));
scsicmd->scsi_done(scsicmd);
- return 1;
+ return 0;
}
dprintk((KERN_DEBUG "aac_read[cpu %d]: lba = %llu, t = %ld.\n",
@@@ -2560,13 -2559,13 +2560,13 @@@ static int aac_write(struct scsi_cmnd
scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 |
SAM_STAT_CHECK_CONDITION;
set_sense(&dev->fsa_dev[cid].sense_data,
- HARDWARE_ERROR, SENCODE_INTERNAL_TARGET_FAILURE,
+ ILLEGAL_REQUEST, SENCODE_LBA_OUT_OF_RANGE,
ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0);
memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
min_t(size_t, sizeof(dev->fsa_dev[cid].sense_data),
SCSI_SENSE_BUFFERSIZE));
scsicmd->scsi_done(scsicmd);
- return 1;
+ return 0;
}
dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n",
diff --combined drivers/scsi/cxgbi/libcxgbi.c
index 0d044c165960,cf261e1ae9d4..e7143b257117
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@@ -2284,6 -2284,34 +2284,6 @@@ int cxgbi_set_conn_param(struct iscsi_c
}
EXPORT_SYMBOL_GPL(cxgbi_set_conn_param);
-static inline int csk_print_port(struct cxgbi_sock *csk, char *buf)
-{
- int len;
-
- cxgbi_sock_get(csk);
- len = sprintf(buf, "%hu\n", ntohs(csk->daddr.sin_port));
- cxgbi_sock_put(csk);
-
- return len;
-}
-
-static inline int csk_print_ip(struct cxgbi_sock *csk, char *buf)
-{
- int len;
-
- cxgbi_sock_get(csk);
- if (csk->csk_family == AF_INET)
- len = sprintf(buf, "%pI4",
- &csk->daddr.sin_addr.s_addr);
- else
- len = sprintf(buf, "%pI6",
- &csk->daddr6.sin6_addr);
-
- cxgbi_sock_put(csk);
-
- return len;
-}
-
int cxgbi_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param,
char *buf)
{
@@@ -2746,7 -2774,7 +2746,7 @@@ static int __init libcxgbi_init_module(
{
pr_info("%s", version);
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) <
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, cb) <
sizeof(struct cxgbi_skb_cb));
return 0;
}
diff --combined drivers/scsi/smartpqi/smartpqi_init.c
index 7b7ef3acb504,fafa4c1c2fea..e1499626cbbd
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@@ -33,11 -33,11 +33,11 @@@
#define BUILD_TIMESTAMP
#endif
-#define DRIVER_VERSION "1.2.8-026"
+#define DRIVER_VERSION "1.2.10-025"
#define DRIVER_MAJOR 1
#define DRIVER_MINOR 2
-#define DRIVER_RELEASE 8
-#define DRIVER_REVISION 26
+#define DRIVER_RELEASE 10
+#define DRIVER_REVISION 25
#define DRIVER_NAME "Microsemi PQI Driver (v" \
DRIVER_VERSION BUILD_TIMESTAMP ")"
@@@ -211,11 -211,6 +211,11 @@@ static inline bool pqi_is_external_raid
return scsi3addr[2] != 0;
}
+static inline bool pqi_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
+{
+ return !ctrl_info->controller_online;
+}
+
static inline void pqi_check_ctrl_health(struct pqi_ctrl_info *ctrl_info)
{
if (ctrl_info->controller_online)
@@@ -240,21 -235,6 +240,21 @@@ static inline void pqi_save_ctrl_mode(s
sis_write_driver_scratch(ctrl_info, mode);
}
+static inline void pqi_ctrl_block_device_reset(struct pqi_ctrl_info *ctrl_info)
+{
+ ctrl_info->block_device_reset = true;
+}
+
+static inline bool pqi_device_reset_blocked(struct pqi_ctrl_info *ctrl_info)
+{
+ return ctrl_info->block_device_reset;
+}
+
+static inline bool pqi_ctrl_blocked(struct pqi_ctrl_info *ctrl_info)
+{
+ return ctrl_info->block_requests;
+}
+
static inline void pqi_ctrl_block_requests(struct pqi_ctrl_info *ctrl_info)
{
ctrl_info->block_requests = true;
@@@ -351,16 -331,6 +351,16 @@@ static inline bool pqi_device_in_remove
return device->in_remove && !ctrl_info->in_shutdown;
}
+static inline void pqi_ctrl_shutdown_start(struct pqi_ctrl_info *ctrl_info)
+{
+ ctrl_info->in_shutdown = true;
+}
+
+static inline bool pqi_ctrl_in_shutdown(struct pqi_ctrl_info *ctrl_info)
+{
+ return ctrl_info->in_shutdown;
+}
+
static inline void pqi_schedule_rescan_worker_with_delay(
struct pqi_ctrl_info *ctrl_info, unsigned long delay)
{
@@@ -390,11 -360,6 +390,11 @@@ static inline void pqi_cancel_rescan_wo
cancel_delayed_work_sync(&ctrl_info->rescan_work);
}
+static inline void pqi_cancel_event_worker(struct pqi_ctrl_info *ctrl_info)
+{
+ cancel_work_sync(&ctrl_info->event_work);
+}
+
static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info)
{
if (!ctrl_info->heartbeat_counter)
@@@ -412,7 -377,7 +412,7 @@@ static inline u8 pqi_read_soft_reset_st
}
static inline void pqi_clear_soft_reset_status(struct pqi_ctrl_info *ctrl_info,
- u8 clear)
+ u8 clear)
{
u8 status;
@@@ -497,9 -462,9 +497,9 @@@ static int pqi_build_raid_path_request(
request->data_direction = SOP_READ_FLAG;
cdb[0] = cmd;
if (cmd == CISS_REPORT_PHYS)
- cdb[1] = CISS_REPORT_PHYS_EXTENDED;
+ cdb[1] = CISS_REPORT_PHYS_FLAG_OTHER;
else
- cdb[1] = CISS_REPORT_LOG_EXTENDED;
+ cdb[1] = CISS_REPORT_LOG_FLAG_UNIQUE_LUN_ID;
put_unaligned_be32(cdb_length, &cdb[6]);
break;
case CISS_GET_RAID_MAP:
@@@ -602,12 -567,13 +602,12 @@@ static void pqi_free_io_request(struct
}
static int pqi_send_scsi_raid_request(struct pqi_ctrl_info *ctrl_info, u8 cmd,
- u8 *scsi3addr, void *buffer, size_t buffer_length, u16 vpd_page,
- struct pqi_raid_error_info *error_info,
- unsigned long timeout_msecs)
+ u8 *scsi3addr, void *buffer, size_t buffer_length, u16 vpd_page,
+ struct pqi_raid_error_info *error_info, unsigned long timeout_msecs)
{
int rc;
- enum dma_data_direction dir;
struct pqi_raid_path_request request;
+ enum dma_data_direction dir;
rc = pqi_build_raid_path_request(ctrl_info, &request,
cmd, scsi3addr, buffer,
@@@ -615,44 -581,44 +615,44 @@@
if (rc)
return rc;
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
- 0, error_info, timeout_msecs);
+ rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0,
+ error_info, timeout_msecs);
pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
+
return rc;
}
-/* Helper functions for pqi_send_scsi_raid_request */
+/* helper functions for pqi_send_scsi_raid_request */
static inline int pqi_send_ctrl_raid_request(struct pqi_ctrl_info *ctrl_info,
- u8 cmd, void *buffer, size_t buffer_length)
+ u8 cmd, void *buffer, size_t buffer_length)
{
return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID,
- buffer, buffer_length, 0, NULL, NO_TIMEOUT);
+ buffer, buffer_length, 0, NULL, NO_TIMEOUT);
}
static inline int pqi_send_ctrl_raid_with_error(struct pqi_ctrl_info *ctrl_info,
- u8 cmd, void *buffer, size_t buffer_length,
- struct pqi_raid_error_info *error_info)
+ u8 cmd, void *buffer, size_t buffer_length,
+ struct pqi_raid_error_info *error_info)
{
return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID,
- buffer, buffer_length, 0, error_info, NO_TIMEOUT);
+ buffer, buffer_length, 0, error_info, NO_TIMEOUT);
}
-
static inline int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info,
- struct bmic_identify_controller *buffer)
+ struct bmic_identify_controller *buffer)
{
return pqi_send_ctrl_raid_request(ctrl_info, BMIC_IDENTIFY_CONTROLLER,
- buffer, sizeof(*buffer));
+ buffer, sizeof(*buffer));
}
static inline int pqi_sense_subsystem_info(struct pqi_ctrl_info *ctrl_info,
- struct bmic_sense_subsystem_info *sense_info)
+ struct bmic_sense_subsystem_info *sense_info)
{
return pqi_send_ctrl_raid_request(ctrl_info,
- BMIC_SENSE_SUBSYSTEM_INFORMATION,
- sense_info, sizeof(*sense_info));
+ BMIC_SENSE_SUBSYSTEM_INFORMATION, sense_info,
+ sizeof(*sense_info));
}
static inline int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info,
@@@ -662,9 -628,83 +662,9 @@@
buffer, buffer_length, vpd_page, NULL, NO_TIMEOUT);
}
-static bool pqi_vpd_page_supported(struct pqi_ctrl_info *ctrl_info,
- u8 *scsi3addr, u16 vpd_page)
-{
- int rc;
- int i;
- int pages;
- unsigned char *buf, bufsize;
-
- buf = kzalloc(256, GFP_KERNEL);
- if (!buf)
- return false;
-
- /* Get the size of the page list first */
- rc = pqi_scsi_inquiry(ctrl_info, scsi3addr,
- VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES,
- buf, SCSI_VPD_HEADER_SZ);
- if (rc != 0)
- goto exit_unsupported;
-
- pages = buf[3];
- if ((pages + SCSI_VPD_HEADER_SZ) <= 255)
- bufsize = pages + SCSI_VPD_HEADER_SZ;
- else
- bufsize = 255;
-
- /* Get the whole VPD page list */
- rc = pqi_scsi_inquiry(ctrl_info, scsi3addr,
- VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES,
- buf, bufsize);
- if (rc != 0)
- goto exit_unsupported;
-
- pages = buf[3];
- for (i = 1; i <= pages; i++)
- if (buf[3 + i] == vpd_page)
- goto exit_supported;
-
-exit_unsupported:
- kfree(buf);
- return false;
-
-exit_supported:
- kfree(buf);
- return true;
-}
-
-static int pqi_get_device_id(struct pqi_ctrl_info *ctrl_info,
- u8 *scsi3addr, u8 *device_id, int buflen)
-{
- int rc;
- unsigned char *buf;
-
- if (!pqi_vpd_page_supported(ctrl_info, scsi3addr, SCSI_VPD_DEVICE_ID))
- return 1; /* function not supported */
-
- buf = kzalloc(64, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- rc = pqi_scsi_inquiry(ctrl_info, scsi3addr,
- VPD_PAGE | SCSI_VPD_DEVICE_ID,
- buf, 64);
- if (rc == 0) {
- if (buflen > 16)
- buflen = 16;
- memcpy(device_id, &buf[SCSI_VPD_DEVICE_ID_IDX], buflen);
- }
-
- kfree(buf);
-
- return rc;
-}
-
static int pqi_identify_physical_device(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device,
- struct bmic_identify_physical_device *buffer,
- size_t buffer_length)
+ struct bmic_identify_physical_device *buffer, size_t buffer_length)
{
int rc;
enum dma_data_direction dir;
@@@ -685,7 -725,6 +685,7 @@@
0, NULL, NO_TIMEOUT);
pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
+
return rc;
}
@@@ -724,7 -763,7 +724,7 @@@ int pqi_csmi_smp_passthru(struct pqi_ct
buffer, buffer_length, error_info);
}
-#define PQI_FETCH_PTRAID_DATA (1UL<<31)
+#define PQI_FETCH_PTRAID_DATA (1 << 31)
static int pqi_set_diag_rescan(struct pqi_ctrl_info *ctrl_info)
{
@@@ -736,15 -775,14 +736,15 @@@
return -ENOMEM;
rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SENSE_DIAG_OPTIONS,
- diag, sizeof(*diag));
+ diag, sizeof(*diag));
if (rc)
goto out;
diag->options |= cpu_to_le32(PQI_FETCH_PTRAID_DATA);
- rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SET_DIAG_OPTIONS,
- diag, sizeof(*diag));
+ rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SET_DIAG_OPTIONS, diag,
+ sizeof(*diag));
+
out:
kfree(diag);
@@@ -755,7 -793,7 +755,7 @@@ static inline int pqi_write_host_wellne
void *buffer, size_t buffer_length)
{
return pqi_send_ctrl_raid_request(ctrl_info, BMIC_WRITE_HOST_WELLNESS,
- buffer, buffer_length);
+ buffer, buffer_length);
}
#pragma pack(1)
@@@ -908,7 -946,7 +908,7 @@@ static inline int pqi_report_luns(struc
void *buffer, size_t buffer_length)
{
return pqi_send_ctrl_raid_request(ctrl_info, cmd, buffer,
- buffer_length);
+ buffer_length);
}
static int pqi_report_phys_logical_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd,
@@@ -1242,9 -1280,9 +1242,9 @@@ static void pqi_get_raid_bypass_status(
if (rc)
goto out;
-#define RAID_BYPASS_STATUS 4
-#define RAID_BYPASS_CONFIGURED 0x1
-#define RAID_BYPASS_ENABLED 0x2
+#define RAID_BYPASS_STATUS 4
+#define RAID_BYPASS_CONFIGURED 0x1
+#define RAID_BYPASS_ENABLED 0x2
bypass_status = buffer[RAID_BYPASS_STATUS];
device->raid_bypass_configured =
@@@ -1347,6 -1385,14 +1347,6 @@@ static int pqi_get_device_info(struct p
}
}
- if (pqi_get_device_id(ctrl_info, device->scsi3addr,
- device->unique_id, sizeof(device->unique_id)) < 0)
- dev_warn(&ctrl_info->pci_dev->dev,
- "Can't get device id for scsi %d:%d:%d:%d\n",
- ctrl_info->scsi_host->host_no,
- device->bus, device->target,
- device->lun);
-
out:
kfree(buffer);
@@@ -1367,7 -1413,6 +1367,7 @@@ static void pqi_get_physical_disk_info(
device->queue_depth = PQI_PHYSICAL_DISK_DEFAULT_MAX_QUEUE_DEPTH;
return;
}
+
device->box_index = id_phys->box_index;
device->phys_box_on_bus = id_phys->phys_box_on_bus;
device->phy_connected_dev_type = id_phys->phy_connected_dev_type[0];
@@@ -1783,7 -1828,7 +1783,7 @@@ static void pqi_update_device_list(stru
device = new_device_list[i];
find_result = pqi_scsi_find_entry(ctrl_info, device,
- &matching_device);
+ &matching_device);
switch (find_result) {
case DEVICE_SAME:
@@@ -2012,8 -2057,9 +2012,8 @@@ static int pqi_update_scsi_devices(stru
rc = -ENOMEM;
goto out;
}
- if (pqi_hide_vsep) {
- int i;
+ if (pqi_hide_vsep) {
for (i = num_physicals - 1; i >= 0; i--) {
phys_lun_ext_entry =
&physdev_list->lun_entries[i];
@@@ -2086,7 -2132,7 +2086,7 @@@
device->is_physical_device = is_physical_device;
if (is_physical_device) {
if (phys_lun_ext_entry->device_type ==
- SA_EXPANDER_SMP_DEVICE)
+ SA_DEVICE_TYPE_EXPANDER_SMP)
device->is_expander_smp_device = true;
} else {
device->is_external_raid_device =
@@@ -2123,13 -2169,16 +2123,13 @@@
if (device->is_physical_device) {
device->wwid = phys_lun_ext_entry->wwid;
if ((phys_lun_ext_entry->device_flags &
- REPORT_PHYS_LUN_DEV_FLAG_AIO_ENABLED) &&
+ CISS_REPORT_PHYS_DEV_FLAG_AIO_ENABLED) &&
phys_lun_ext_entry->aio_handle) {
device->aio_enabled = true;
- device->aio_handle =
- phys_lun_ext_entry->aio_handle;
+ device->aio_handle =
+ phys_lun_ext_entry->aio_handle;
}
-
- pqi_get_physical_disk_info(ctrl_info,
- device, id_phys);
-
+ pqi_get_physical_disk_info(ctrl_info, device, id_phys);
} else {
memcpy(device->volume_id, log_lun_ext_entry->volume_id,
sizeof(device->volume_id));
@@@ -3109,7 -3158,7 +3109,7 @@@ static enum pqi_soft_reset_status pqi_p
}
static void pqi_process_soft_reset(struct pqi_ctrl_info *ctrl_info,
- enum pqi_soft_reset_status reset_status)
+ enum pqi_soft_reset_status reset_status)
{
int rc;
@@@ -3153,8 -3202,8 +3153,8 @@@ static void pqi_ofa_process_event(struc
if (event_id == PQI_EVENT_OFA_QUIESCE) {
dev_info(&ctrl_info->pci_dev->dev,
- "Received Online Firmware Activation quiesce event for controller %u\n",
- ctrl_info->ctrl_id);
+ "Received Online Firmware Activation quiesce event for controller %u\n",
+ ctrl_info->ctrl_id);
pqi_ofa_ctrl_quiesce(ctrl_info);
pqi_acknowledge_event(ctrl_info, event);
if (ctrl_info->soft_reset_handshake_supported) {
@@@ -3174,8 -3223,8 +3174,8 @@@
pqi_ofa_free_host_buffer(ctrl_info);
pqi_acknowledge_event(ctrl_info, event);
dev_info(&ctrl_info->pci_dev->dev,
- "Online Firmware Activation(%u) cancel reason : %u\n",
- ctrl_info->ctrl_id, event->ofa_cancel_reason);
+ "Online Firmware Activation(%u) cancel reason : %u\n",
+ ctrl_info->ctrl_id, event->ofa_cancel_reason);
}
mutex_unlock(&ctrl_info->ofa_mutex);
@@@ -3354,7 -3403,7 +3354,7 @@@ static unsigned int pqi_process_event_i
#define PQI_LEGACY_INTX_MASK 0x1
static inline void pqi_configure_legacy_intx(struct pqi_ctrl_info *ctrl_info,
- bool enable_intx)
+ bool enable_intx)
{
u32 intx_mask;
struct pqi_device_registers __iomem *pqi_registers;
@@@ -3792,7 -3841,7 +3792,7 @@@ static int pqi_create_admin_queues(stru
&pqi_registers->admin_oq_pi_addr);
reg = PQI_ADMIN_IQ_NUM_ELEMENTS |
- (PQI_ADMIN_OQ_NUM_ELEMENTS) << 8 |
+ (PQI_ADMIN_OQ_NUM_ELEMENTS << 8) |
(admin_queues->int_msg_num << 16);
writel(reg, &pqi_registers->admin_iq_num_elements);
writel(PQI_CREATE_ADMIN_QUEUE_PAIR,
@@@ -3999,8 -4048,8 +3999,8 @@@ static void pqi_raid_synchronous_comple
complete(waiting);
}
-static int pqi_process_raid_io_error_synchronous(struct pqi_raid_error_info
- *error_info)
+static int pqi_process_raid_io_error_synchronous(
+ struct pqi_raid_error_info *error_info)
{
int rc = -EIO;
@@@ -4073,8 -4122,6 +4073,8 @@@ static int pqi_submit_raid_request_sync
goto out;
}
+ atomic_inc(&ctrl_info->sync_cmds_outstanding);
+
io_request = pqi_alloc_io_request(ctrl_info);
put_unaligned_le16(io_request->index,
@@@ -4121,7 -4168,6 +4121,7 @@@
pqi_free_io_request(io_request);
+ atomic_dec(&ctrl_info->sync_cmds_outstanding);
out:
up(&ctrl_info->sync_request_sem);
@@@ -4619,11 -4665,11 +4619,11 @@@ static void pqi_free_all_io_requests(st
static inline int pqi_alloc_error_buffer(struct pqi_ctrl_info *ctrl_info)
{
- ctrl_info->error_buffer = dma_alloc_coherent(&ctrl_info->pci_dev->dev,
- ctrl_info->error_buffer_length,
- &ctrl_info->error_buffer_dma_handle,
- GFP_KERNEL);
+ ctrl_info->error_buffer = dma_alloc_coherent(&ctrl_info->pci_dev->dev,
+ ctrl_info->error_buffer_length,
+ &ctrl_info->error_buffer_dma_handle,
+ GFP_KERNEL);
if (!ctrl_info->error_buffer)
return -ENOMEM;
@@@ -5356,7 -5402,7 +5356,7 @@@ static int pqi_scsi_queue_command(struc
pqi_ctrl_busy(ctrl_info);
if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device) ||
- pqi_ctrl_in_ofa(ctrl_info)) {
+ pqi_ctrl_in_ofa(ctrl_info) || pqi_ctrl_in_shutdown(ctrl_info)) {
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
@@@ -5373,7 -5419,7 +5373,7 @@@
if (pqi_is_logical_device(device)) {
raid_bypassed = false;
if (device->raid_bypass_enabled &&
- !blk_rq_is_passthrough(scmd->request)) {
+ !blk_rq_is_passthrough(scmd->request)) {
rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device,
scmd, queue_group);
if (rc == 0 || rc == SCSI_MLQUEUE_HOST_BUSY)
@@@ -5604,18 -5650,6 +5604,18 @@@ static int pqi_ctrl_wait_for_pending_io
return 0;
}
+static int pqi_ctrl_wait_for_pending_sync_cmds(struct pqi_ctrl_info *ctrl_info)
+{
+ while (atomic_read(&ctrl_info->sync_cmds_outstanding)) {
+ pqi_check_ctrl_health(ctrl_info);
+ if (pqi_ctrl_offline(ctrl_info))
+ return -ENXIO;
+ usleep_range(1000, 2000);
+ }
+
+ return 0;
+}
+
static void pqi_lun_reset_complete(struct pqi_io_request *io_request,
void *context)
{
@@@ -5624,8 -5658,7 +5624,8 @@@
complete(waiting);
}
-#define PQI_LUN_RESET_TIMEOUT_SECS 10
+#define PQI_LUN_RESET_TIMEOUT_SECS 30
+#define PQI_LUN_RESET_POLL_COMPLETION_SECS 10
static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device, struct completion *wait)
@@@ -5634,7 -5667,7 +5634,7 @@@
while (1) {
if (wait_for_completion_io_timeout(wait,
- PQI_LUN_RESET_TIMEOUT_SECS * PQI_HZ)) {
+ PQI_LUN_RESET_POLL_COMPLETION_SECS * PQI_HZ)) {
rc = 0;
break;
}
@@@ -5671,9 -5704,6 +5671,9 @@@ static int pqi_lun_reset(struct pqi_ctr
memcpy(request->lun_number, device->scsi3addr,
sizeof(request->lun_number));
request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
+ if (ctrl_info->tmf_iu_timeout_supported)
+ put_unaligned_le16(PQI_LUN_RESET_TIMEOUT_SECS,
+ &request->timeout);
pqi_start_io(ctrl_info,
&ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
@@@ -5703,7 -5733,7 +5703,7 @@@ static int _pqi_device_reset(struct pqi
for (retries = 0;;) {
rc = pqi_lun_reset(ctrl_info, device);
- if (rc != -EAGAIN || ++retries > PQI_LUN_RESET_RETRIES)
+ if (rc == 0 || ++retries > PQI_LUN_RESET_RETRIES)
break;
msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
}
@@@ -5757,17 -5787,17 +5757,17 @@@ static int pqi_eh_device_reset_handler(
shost->host_no, device->bus, device->target, device->lun);
pqi_check_ctrl_health(ctrl_info);
- if (pqi_ctrl_offline(ctrl_info)) {
- dev_err(&ctrl_info->pci_dev->dev,
- "controller %u offlined - cannot send device reset\n",
- ctrl_info->ctrl_id);
+ if (pqi_ctrl_offline(ctrl_info) ||
+ pqi_device_reset_blocked(ctrl_info)) {
rc = FAILED;
goto out;
}
pqi_wait_until_ofa_finished(ctrl_info);
+ atomic_inc(&ctrl_info->sync_cmds_outstanding);
rc = pqi_device_reset(ctrl_info, device);
+ atomic_dec(&ctrl_info->sync_cmds_outstanding);
out:
dev_err(&ctrl_info->pci_dev->dev,
@@@ -6036,9 -6066,6 +6036,9 @@@ static int pqi_passthru_ioctl(struct pq
put_unaligned_le16(iu_length, &request.header.iu_length);
+ if (ctrl_info->raid_iu_timeout_supported)
+ put_unaligned_le32(iocommand.Request.Timeout, &request.timeout);
+
rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
PQI_SYNC_FLAGS_INTERRUPTABLE, &pqi_error_info, NO_TIMEOUT);
@@@ -6092,7 -6119,7 +6092,7 @@@ static int pqi_ioctl(struct scsi_devic
ctrl_info = shost_to_hba(sdev->host);
- if (pqi_ctrl_in_ofa(ctrl_info))
+ if (pqi_ctrl_in_ofa(ctrl_info) || pqi_ctrl_in_shutdown(ctrl_info))
return -EBUSY;
switch (cmd) {
@@@ -6133,8 -6160,14 +6133,8 @@@ static ssize_t pqi_firmware_version_sho
static ssize_t pqi_driver_version_show(struct device *dev,
struct device_attribute *attr, char *buffer)
{
- struct Scsi_Host *shost;
- struct pqi_ctrl_info *ctrl_info;
-
- shost = class_to_shost(dev);
- ctrl_info = shost_to_hba(shost);
-
- return snprintf(buffer, PAGE_SIZE,
- "%s\n", DRIVER_VERSION BUILD_TIMESTAMP);
+ return snprintf(buffer, PAGE_SIZE, "%s\n",
+ DRIVER_VERSION BUILD_TIMESTAMP);
}
static ssize_t pqi_serial_number_show(struct device *dev,
@@@ -6250,7 -6283,7 +6250,7 @@@ static ssize_t pqi_unique_id_show(struc
struct scsi_device *sdev;
struct pqi_scsi_dev *device;
unsigned long flags;
- unsigned char uid[16];
+ u8 unique_id[16];
sdev = to_scsi_device(dev);
ctrl_info = shost_to_hba(sdev->host);
@@@ -6263,22 -6296,16 +6263,22 @@@
flags);
return -ENODEV;
}
- memcpy(uid, device->unique_id, sizeof(uid));
+
+ if (device->is_physical_device) {
+ memset(unique_id, 0, 8);
+ memcpy(unique_id + 8, &device->wwid, sizeof(device->wwid));
+ } else {
+ memcpy(unique_id, device->volume_id, sizeof(device->volume_id));
+ }
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
return snprintf(buffer, PAGE_SIZE,
"%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\n",
- uid[0], uid[1], uid[2], uid[3],
- uid[4], uid[5], uid[6], uid[7],
- uid[8], uid[9], uid[10], uid[11],
- uid[12], uid[13], uid[14], uid[15]);
+ unique_id[0], unique_id[1], unique_id[2], unique_id[3],
+ unique_id[4], unique_id[5], unique_id[6], unique_id[7],
+ unique_id[8], unique_id[9], unique_id[10], unique_id[11],
+ unique_id[12], unique_id[13], unique_id[14], unique_id[15]);
}
static ssize_t pqi_lunid_show(struct device *dev,
@@@ -6301,7 -6328,6 +6301,7 @@@
flags);
return -ENODEV;
}
+
memcpy(lunid, device->scsi3addr, sizeof(lunid));
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@@ -6309,8 -6335,7 +6309,8 @@@
return snprintf(buffer, PAGE_SIZE, "0x%8phN\n", lunid);
}
-#define MAX_PATHS 8
+#define MAX_PATHS 8
+
static ssize_t pqi_path_info_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@@ -6322,9 -6347,9 +6322,9 @@@
int output_len = 0;
u8 box;
u8 bay;
- u8 path_map_index = 0;
+ u8 path_map_index;
char *active;
- unsigned char phys_connector[2];
+ u8 phys_connector[2];
sdev = to_scsi_device(dev);
ctrl_info = shost_to_hba(sdev->host);
@@@ -6340,7 -6365,7 +6340,7 @@@
bay = device->bay;
for (i = 0; i < MAX_PATHS; i++) {
- path_map_index = 1<<i;
+ path_map_index = 1 << i;
if (i == device->active_path_index)
active = "Active";
else if (device->path_map & path_map_index)
@@@ -6391,10 -6416,10 +6391,10 @@@ end_buffer
}
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+
return output_len;
}
-
static ssize_t pqi_sas_address_show(struct device *dev,
struct device_attribute *attr, char *buffer)
{
@@@ -6415,7 -6440,6 +6415,7 @@@
flags);
return -ENODEV;
}
+
sas_address = device->sas_address;
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@@ -6820,27 -6844,6 +6820,27 @@@ static void pqi_firmware_feature_status
firmware_feature->feature_name);
}
+static void pqi_ctrl_update_feature_flags(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_firmware_feature *firmware_feature)
+{
+ switch (firmware_feature->feature_bit) {
+ case PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE:
+ ctrl_info->soft_reset_handshake_supported =
+ firmware_feature->enabled;
+ break;
+ case PQI_FIRMWARE_FEATURE_RAID_IU_TIMEOUT:
+ ctrl_info->raid_iu_timeout_supported =
+ firmware_feature->enabled;
+ break;
+ case PQI_FIRMWARE_FEATURE_TMF_IU_TIMEOUT:
+ ctrl_info->tmf_iu_timeout_supported =
+ firmware_feature->enabled;
+ break;
+ }
+
+ pqi_firmware_feature_status(ctrl_info, firmware_feature);
+}
+
static inline void pqi_firmware_feature_update(struct pqi_ctrl_info *ctrl_info,
struct pqi_firmware_feature *firmware_feature)
{
@@@ -6864,17 -6867,7 +6864,17 @@@ static struct pqi_firmware_feature pqi_
{
.feature_name = "New Soft Reset Handshake",
.feature_bit = PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE,
- .feature_status = pqi_firmware_feature_status,
+ .feature_status = pqi_ctrl_update_feature_flags,
+ },
+ {
+ .feature_name = "RAID IU Timeout",
+ .feature_bit = PQI_FIRMWARE_FEATURE_RAID_IU_TIMEOUT,
+ .feature_status = pqi_ctrl_update_feature_flags,
+ },
+ {
+ .feature_name = "TMF IU Timeout",
+ .feature_bit = PQI_FIRMWARE_FEATURE_TMF_IU_TIMEOUT,
+ .feature_status = pqi_ctrl_update_feature_flags,
},
};
@@@ -6928,6 -6921,7 +6928,6 @@@ static void pqi_process_firmware_featur
return;
}
- ctrl_info->soft_reset_handshake_supported = false;
for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) {
if (!pqi_firmware_features[i].supported)
continue;
@@@ -6935,6 -6929,10 +6935,6 @@@
firmware_features_iomem_addr,
pqi_firmware_features[i].feature_bit)) {
pqi_firmware_features[i].enabled = true;
- if (pqi_firmware_features[i].feature_bit ==
- PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE)
- ctrl_info->soft_reset_handshake_supported =
- true;
}
pqi_firmware_feature_update(ctrl_info,
&pqi_firmware_features[i]);
@@@ -7076,20 -7074,13 +7076,20 @@@ static int pqi_force_sis_mode(struct pq
return pqi_revert_to_sis_mode(ctrl_info);
}
+#define PQI_POST_RESET_DELAY_B4_MSGU_READY 5000
+
static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
{
int rc;
- rc = pqi_force_sis_mode(ctrl_info);
- if (rc)
- return rc;
+ if (reset_devices) {
+ sis_soft_reset(ctrl_info);
+ msleep(PQI_POST_RESET_DELAY_B4_MSGU_READY);
+ } else {
+ rc = pqi_force_sis_mode(ctrl_info);
+ if (rc)
+ return rc;
+ }
/*
* Wait until the controller is ready to start accepting SIS
@@@ -7395,7 -7386,7 +7395,7 @@@ static int pqi_ctrl_init_resume(struct
rc = pqi_get_ctrl_product_details(ctrl_info);
if (rc) {
dev_err(&ctrl_info->pci_dev->dev,
- "error obtaining product detail\n");
+ "error obtaining product details\n");
return rc;
}
@@@ -7523,7 -7514,6 +7523,7 @@@ static struct pqi_ctrl_info *pqi_alloc_
INIT_WORK(&ctrl_info->event_work, pqi_event_worker);
atomic_set(&ctrl_info->num_interrupts, 0);
+ atomic_set(&ctrl_info->sync_cmds_outstanding, 0);
INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);
@@@ -7731,8 -7721,6 +7731,8 @@@ static void pqi_ofa_setup_host_buffer(s
dev_err(dev, "Failed to allocate host buffer of size = %u",
bytes_requested);
}
+
+ return;
}
static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info)
@@@ -7799,6 -7787,8 +7799,6 @@@ static int pqi_ofa_host_memory_update(s
0, NULL, NO_TIMEOUT);
}
-#define PQI_POST_RESET_DELAY_B4_MSGU_READY 5000
-
static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info)
{
msleep(PQI_POST_RESET_DELAY_B4_MSGU_READY);
@@@ -7966,73 -7956,28 +7966,73 @@@ static void pqi_pci_remove(struct pci_d
pqi_remove_ctrl(ctrl_info);
}
+static void pqi_crash_if_pending_command(struct pqi_ctrl_info *ctrl_info)
+{
+ unsigned int i;
+ struct pqi_io_request *io_request;
+ struct scsi_cmnd *scmd;
+
+ for (i = 0; i < ctrl_info->max_io_slots; i++) {
+ io_request = &ctrl_info->io_request_pool[i];
+ if (atomic_read(&io_request->refcount) == 0)
+ continue;
+ scmd = io_request->scmd;
+ WARN_ON(scmd != NULL); /* IO command from SML */
+ WARN_ON(scmd == NULL); /* Non-IO cmd or driver initiated*/
+ }
+}
+
static void pqi_shutdown(struct pci_dev *pci_dev)
{
int rc;
struct pqi_ctrl_info *ctrl_info;
ctrl_info = pci_get_drvdata(pci_dev);
- if (!ctrl_info)
- goto error;
+ if (!ctrl_info) {
+ dev_err(&pci_dev->dev,
+ "cache could not be flushed\n");
+ return;
+ }
+
+ pqi_disable_events(ctrl_info);
+ pqi_wait_until_ofa_finished(ctrl_info);
+ pqi_cancel_update_time_worker(ctrl_info);
+ pqi_cancel_rescan_worker(ctrl_info);
+ pqi_cancel_event_worker(ctrl_info);
+
+ pqi_ctrl_shutdown_start(ctrl_info);
+ pqi_ctrl_wait_until_quiesced(ctrl_info);
+
+ rc = pqi_ctrl_wait_for_pending_io(ctrl_info, NO_TIMEOUT);
+ if (rc) {
+ dev_err(&pci_dev->dev,
+ "wait for pending I/O failed\n");
+ return;
+ }
+
+ pqi_ctrl_block_device_reset(ctrl_info);
+ pqi_wait_until_lun_reset_finished(ctrl_info);
/*
* Write all data in the controller's battery-backed cache to
* storage.
*/
rc = pqi_flush_cache(ctrl_info, SHUTDOWN);
- pqi_free_interrupts(ctrl_info);
- pqi_reset(ctrl_info);
- if (rc == 0)
+ if (rc)
+ dev_err(&pci_dev->dev,
+ "unable to flush controller cache\n");
+
+ pqi_ctrl_block_requests(ctrl_info);
+
+ rc = pqi_ctrl_wait_for_pending_sync_cmds(ctrl_info);
+ if (rc) {
+ dev_err(&pci_dev->dev,
+ "wait for pending sync cmds failed\n");
return;
+ }
-error:
- dev_warn(&pci_dev->dev,
- "unable to flush controller cache\n");
+ pqi_crash_if_pending_command(ctrl_info);
+ pqi_reset(ctrl_info);
}
static void pqi_process_lockup_action_param(void)
@@@ -8689,11 -8634,11 +8689,11 @@@ static void __attribute__((unused)) ver
BUILD_BUG_ON(offsetof(struct pqi_general_admin_request,
data.delete_operational_queue.queue_id) != 12);
BUILD_BUG_ON(sizeof(struct pqi_general_admin_request) != 64);
- BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request,
+ BUILD_BUG_ON(sizeof_member(struct pqi_general_admin_request,
data.create_operational_iq) != 64 - 11);
- BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request,
+ BUILD_BUG_ON(sizeof_member(struct pqi_general_admin_request,
data.create_operational_oq) != 64 - 11);
- BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request,
+ BUILD_BUG_ON(sizeof_member(struct pqi_general_admin_request,
data.delete_operational_queue) != 64 - 11);
BUILD_BUG_ON(offsetof(struct pqi_general_admin_response,
@@@ -8740,8 -8685,6 +8740,8 @@@
error_index) != 27);
BUILD_BUG_ON(offsetof(struct pqi_raid_path_request,
cdb) != 32);
+ BUILD_BUG_ON(offsetof(struct pqi_raid_path_request,
+ timeout) != 60);
BUILD_BUG_ON(offsetof(struct pqi_raid_path_request,
sg_descriptors) != 64);
BUILD_BUG_ON(sizeof(struct pqi_raid_path_request) !=
@@@ -8897,8 -8840,6 +8897,8 @@@
BUILD_BUG_ON(offsetof(struct pqi_task_management_request,
nexus_id) != 10);
BUILD_BUG_ON(offsetof(struct pqi_task_management_request,
+ timeout) != 14);
+ BUILD_BUG_ON(offsetof(struct pqi_task_management_request,
lun_number) != 16);
BUILD_BUG_ON(offsetof(struct pqi_task_management_request,
protocol_specific) != 24);
diff --combined drivers/usb/gadget/function/f_fs.c
index ce1d0235969c,2539ea40e94c..b0f3aa67a040
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@@ -1352,6 -1352,14 +1352,6 @@@ static long ffs_epfile_ioctl(struct fil
return ret;
}
-#ifdef CONFIG_COMPAT
-static long ffs_epfile_compat_ioctl(struct file *file, unsigned code,
- unsigned long value)
-{
- return ffs_epfile_ioctl(file, code, value);
-}
-#endif
-
static const struct file_operations ffs_epfile_operations = {
.llseek = no_llseek,
@@@ -1360,7 -1368,9 +1360,7 @@@
.read_iter = ffs_epfile_read_iter,
.release = ffs_epfile_release,
.unlocked_ioctl = ffs_epfile_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ffs_epfile_compat_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
@@@ -3509,7 -3519,7 +3509,7 @@@ static void ffs_free_inst(struct usb_fu
static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name)
{
- if (strlen(name) >= FIELD_SIZEOF(struct ffs_dev, name))
+ if (strlen(name) >= sizeof_member(struct ffs_dev, name))
return -ENAMETOOLONG;
return ffs_name_dev(to_f_fs_opts(fi)->dev, name);
}
diff --combined fs/crypto/keyring.c
index 040df1f5e1c8,58e8b4b167d6..7853f202a17d
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@@ -43,10 -43,8 +43,10 @@@ static void free_master_key(struct fscr
wipe_master_key_secret(&mk->mk_secret);
- for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++)
- crypto_free_skcipher(mk->mk_mode_keys[i]);
+ for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) {
+ crypto_free_skcipher(mk->mk_direct_tfms[i]);
+ crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]);
+ }
key_put(mk->mk_users);
kzfree(mk);
@@@ -151,7 -149,7 +151,7 @@@ static struct key *search_fscrypt_keyri
}
#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \
- (CONST_STRLEN("fscrypt-") + FIELD_SIZEOF(struct super_block, s_id))
+ (CONST_STRLEN("fscrypt-") + sizeof_member(struct super_block, s_id))
#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
diff --combined fs/ext2/super.c
index bcffe25da2f0,2435eda5e2d6..1e036daab271
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@@ -219,7 -219,7 +219,7 @@@ static int __init init_inodecache(void
(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
SLAB_ACCOUNT),
offsetof(struct ext2_inode_info, i_data),
- sizeof_field(struct ext2_inode_info, i_data),
+ sizeof_member(struct ext2_inode_info, i_data),
init_once);
if (ext2_inode_cachep == NULL)
return -ENOMEM;
@@@ -702,7 -702,13 +702,7 @@@ static int ext2_check_descriptors(struc
for (i = 0; i < sbi->s_groups_count; i++) {
struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL);
ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i);
- ext2_fsblk_t last_block;
-
- if (i == sbi->s_groups_count - 1)
- last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
- else
- last_block = first_block +
- (EXT2_BLOCKS_PER_GROUP(sb) - 1);
+ ext2_fsblk_t last_block = ext2_group_last_block_no(sb, i);
if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
le32_to_cpu(gdp->bg_block_bitmap) > last_block)
@@@ -800,6 -806,7 +800,6 @@@ static unsigned long descriptor_loc(str
{
struct ext2_sb_info *sbi = EXT2_SB(sb);
unsigned long bg, first_meta_bg;
- int has_super = 0;
first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
@@@ -807,8 -814,10 +807,8 @@@
nr < first_meta_bg)
return (logic_sb_block + nr + 1);
bg = sbi->s_desc_per_block * nr;
- if (ext2_bg_has_super(sb, bg))
- has_super = 1;
- return ext2_group_first_block_no(sb, bg) + has_super;
+ return ext2_group_first_block_no(sb, bg) + ext2_bg_has_super(sb, bg);
}
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
diff --combined fs/ext4/super.c
index 1d82b56d9b11,2318e5fe3fd4..1ef37008ea9c
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -1151,7 -1151,7 +1151,7 @@@ static int __init init_inodecache(void
(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
SLAB_ACCOUNT),
offsetof(struct ext4_inode_info, i_data),
- sizeof_field(struct ext4_inode_info, i_data),
+ sizeof_member(struct ext4_inode_info, i_data),
init_once);
if (ext4_inode_cachep == NULL)
return -ENOMEM;
@@@ -1172,9 -1172,9 +1172,9 @@@ void ext4_clear_inode(struct inode *ino
{
invalidate_inode_buffers(inode);
clear_inode(inode);
- dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
+ dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
@@@ -1345,18 -1345,6 +1345,18 @@@ static bool ext4_dummy_context(struct i
return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
}
+static bool ext4_has_stable_inodes(struct super_block *sb)
+{
+ return ext4_has_feature_stable_inodes(sb);
+}
+
+static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
+ int *ino_bits_ret, int *lblk_bits_ret)
+{
+ *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
+ *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
+}
+
static const struct fscrypt_operations ext4_cryptops = {
.key_prefix = "ext4:",
.get_context = ext4_get_context,
@@@ -1364,8 -1352,6 +1364,8 @@@
.dummy_context = ext4_dummy_context,
.empty_dir = ext4_empty_dir,
.max_namelen = EXT4_NAME_LEN,
+ .has_stable_inodes = ext4_has_stable_inodes,
+ .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
};
#endif
@@@ -1388,6 -1374,7 +1388,6 @@@ static ssize_t ext4_quota_write(struct
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags);
static int ext4_enable_quotas(struct super_block *sb);
-static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
static struct dquot **ext4_get_dquots(struct inode *inode)
{
@@@ -1405,7 -1392,7 +1405,7 @@@ static const struct dquot_operations ex
.destroy_dquot = dquot_destroy,
.get_projid = ext4_get_projid,
.get_inode_usage = ext4_get_inode_usage,
- .get_next_id = ext4_get_next_id,
+ .get_next_id = dquot_get_next_id,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@@ -2064,7 -2051,7 +2064,7 @@@ static int parse_options(char *options
unsigned int *journal_ioprio,
int is_remount)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
substring_t args[MAX_OPT_ARGS];
int token;
@@@ -2118,6 -2105,16 +2118,6 @@@
}
}
#endif
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- int blocksize =
- BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
-
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
- return 0;
- }
- }
return 1;
}
@@@ -3558,15 -3555,12 +3558,15 @@@ static void ext4_clamp_want_extra_isize
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ unsigned def_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
- sbi->s_want_extra_isize == 0) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
+ if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = 0;
+ return;
+ }
+ if (sbi->s_want_extra_isize < 4) {
+ sbi->s_want_extra_isize = def_extra_isize;
if (ext4_has_feature_extra_isize(sb)) {
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize))
@@@ -3579,10 -3573,10 +3579,10 @@@
}
}
/* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
+ if ((sbi->s_want_extra_isize > sbi->s_inode_size) ||
+ (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size)) {
+ sbi->s_want_extra_isize = def_extra_isize;
ext4_msg(sb, KERN_INFO,
"required extra inode space not available");
}
@@@ -4445,6 -4439,13 +4445,6 @@@ no_journal
}
}
- if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
- (blocksize != PAGE_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported blocksize for fs encryption");
- goto failed_mount_wq;
- }
-
if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
goto failed_mount_wq;
@@@ -5834,7 -5835,7 +5834,7 @@@ static int ext4_quota_enable(struct sup
/* Don't account quota for quota files to avoid recursion */
qf_inode->i_flags |= S_NOQUOTA;
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
- err = dquot_enable(qf_inode, type, format_id, flags);
+ err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
if (err)
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
iput(qf_inode);
@@@ -6018,6 -6019,18 +6018,6 @@@ out
}
return len;
}
-
-static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
-{
- const struct quota_format_ops *ops;
-
- if (!sb_has_quota_loaded(sb, qid->type))
- return -ESRCH;
- ops = sb_dqopt(sb)->ops[qid->type];
- if (!ops || !ops->get_next_id)
- return -ENOSYS;
- return dquot_get_next_id(sb, qid);
-}
#endif
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
diff --combined fs/fuse/virtio_fs.c
index bade74768903,13f67ae37257..be36b69de9ca
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@@ -30,12 -30,10 +30,12 @@@ struct virtio_fs_vq
struct virtqueue *vq; /* protected by ->lock */
struct work_struct done_work;
struct list_head queued_reqs;
+ struct list_head end_reqs; /* End these requests */
struct delayed_work dispatch_work;
struct fuse_dev *fud;
bool connected;
long in_flight;
+ struct completion in_flight_zero; /* No inflight requests */
char name[24];
} ____cacheline_aligned_in_smp;
@@@ -49,20 -47,13 +49,20 @@@ struct virtio_fs
unsigned int num_request_queues; /* number of request queues */
};
-struct virtio_fs_forget {
+struct virtio_fs_forget_req {
struct fuse_in_header ih;
struct fuse_forget_in arg;
+};
+
+struct virtio_fs_forget {
/* This request can be temporarily queued on virt queue */
struct list_head list;
+ struct virtio_fs_forget_req req;
};
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+ struct fuse_req *req, bool in_flight);
+
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{
struct virtio_fs *fs = vq->vdev->priv;
@@@ -75,21 -66,6 +75,21 @@@ static inline struct fuse_pqueue *vq_to
return &vq_to_fsvq(vq)->fud->pq;
}
+/* Should be called with fsvq->lock held. */
+static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+ fsvq->in_flight++;
+}
+
+/* Should be called with fsvq->lock held. */
+static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+ WARN_ON(fsvq->in_flight <= 0);
+ fsvq->in_flight--;
+ if (!fsvq->in_flight)
+ complete(&fsvq->in_flight_zero);
+}
+
static void release_virtio_fs_obj(struct kref *ref)
{
struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
@@@ -118,46 -94,51 +118,46 @@@ static void virtio_fs_drain_queue(struc
WARN_ON(fsvq->in_flight < 0);
/* Wait for in flight requests to finish.*/
- while (1) {
- spin_lock(&fsvq->lock);
- if (!fsvq->in_flight) {
- spin_unlock(&fsvq->lock);
- break;
- }
+ spin_lock(&fsvq->lock);
+ if (fsvq->in_flight) {
+ /* We are holding virtio_fs_mutex. There should not be any
+ * waiters waiting for completion.
+ */
+ reinit_completion(&fsvq->in_flight_zero);
+ spin_unlock(&fsvq->lock);
+ wait_for_completion(&fsvq->in_flight_zero);
+ } else {
spin_unlock(&fsvq->lock);
- /* TODO use completion instead of timeout */
- usleep_range(1000, 2000);
}
flush_work(&fsvq->done_work);
flush_delayed_work(&fsvq->dispatch_work);
}
-static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
-{
- struct virtio_fs_forget *forget;
-
- spin_lock(&fsvq->lock);
- while (1) {
- forget = list_first_entry_or_null(&fsvq->queued_reqs,
- struct virtio_fs_forget, list);
- if (!forget)
- break;
- list_del(&forget->list);
- kfree(forget);
- }
- spin_unlock(&fsvq->lock);
-}
-
-static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
+static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
{
struct virtio_fs_vq *fsvq;
int i;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
- if (i == VQ_HIPRIO)
- drain_hiprio_queued_reqs(fsvq);
-
virtio_fs_drain_queue(fsvq);
}
}
+static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
+{
+ /* Provides mutual exclusion between ->remove and ->kill_sb
+ * paths. We don't want both of these draining queue at the
+ * same time. Current completion logic reinits completion
+ * and that means there should not be any other thread
+ * doing reinit or waiting for completion already.
+ */
+ mutex_lock(&virtio_fs_mutex);
+ virtio_fs_drain_all_queues_locked(fs);
+ mutex_unlock(&virtio_fs_mutex);
+}
+
static void virtio_fs_start_all_queues(struct virtio_fs *fs)
{
struct virtio_fs_vq *fsvq;
@@@ -234,7 -215,7 +234,7 @@@ static void virtio_fs_free_devs(struct
/* Read filesystem name from virtio config into fs->tag (must kfree()). */
static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
{
- char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
+ char tag_buf[sizeof_member(struct virtio_fs_config, tag)];
char *end;
size_t len;
@@@ -272,148 -253,74 +272,148 @@@ static void virtio_fs_hiprio_done_work(
while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
kfree(req);
- fsvq->in_flight--;
+ dec_in_flight_req(fsvq);
}
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
spin_unlock(&fsvq->lock);
}
-static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
+static void virtio_fs_request_dispatch_work(struct work_struct *work)
{
-}
-
-static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
-{
- struct virtio_fs_forget *forget;
+ struct fuse_req *req;
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
dispatch_work.work);
- struct virtqueue *vq = fsvq->vq;
- struct scatterlist sg;
- struct scatterlist *sgs[] = {&sg};
- bool notify;
+ struct fuse_conn *fc = fsvq->fud->fc;
int ret;
pr_debug("virtio-fs: worker %s called.\n", __func__);
while (1) {
spin_lock(&fsvq->lock);
- forget = list_first_entry_or_null(&fsvq->queued_reqs,
- struct virtio_fs_forget, list);
- if (!forget) {
+ req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
+ list);
+ if (!req) {
spin_unlock(&fsvq->lock);
- return;
+ break;
}
- list_del(&forget->list);
- if (!fsvq->connected) {
+ list_del_init(&req->list);
+ spin_unlock(&fsvq->lock);
+ fuse_request_end(fc, req);
+ }
+
+ /* Dispatch pending requests */
+ while (1) {
+ spin_lock(&fsvq->lock);
+ req = list_first_entry_or_null(&fsvq->queued_reqs,
+ struct fuse_req, list);
+ if (!req) {
spin_unlock(&fsvq->lock);
- kfree(forget);
- continue;
+ return;
}
+ list_del_init(&req->list);
+ spin_unlock(&fsvq->lock);
- sg_init_one(&sg, forget, sizeof(*forget));
-
- /* Enqueue the request */
- dev_dbg(&vq->vdev->dev, "%s\n", __func__);
- ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+ ret = virtio_fs_enqueue_req(fsvq, req, true);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
- pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
- ret);
- list_add_tail(&forget->list,
- &fsvq->queued_reqs);
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->queued_reqs);
schedule_delayed_work(&fsvq->dispatch_work,
- msecs_to_jiffies(1));
- } else {
- pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
- ret);
- kfree(forget);
+ msecs_to_jiffies(1));
+ spin_unlock(&fsvq->lock);
+ return;
}
+ req->out.h.error = ret;
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ spin_unlock(&fsvq->lock);
+ pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
+ ret);
+ fuse_request_end(fc, req);
+ }
+ }
+}
+
+/*
+ * Returns 1 if queue is full and sender should wait a bit before sending
+ * next request, 0 otherwise.
+ */
+static int send_forget_request(struct virtio_fs_vq *fsvq,
+ struct virtio_fs_forget *forget,
+ bool in_flight)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq;
+ int ret = 0;
+ bool notify;
+ struct virtio_fs_forget_req *req = &forget->req;
+
+ spin_lock(&fsvq->lock);
+ if (!fsvq->connected) {
+ if (in_flight)
+ dec_in_flight_req(fsvq);
+ kfree(forget);
+ goto out;
+ }
+
+ sg_init_one(&sg, req, sizeof(*req));
+ vq = fsvq->vq;
+ dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+
+ ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
+ if (ret < 0) {
+ if (ret == -ENOMEM || ret == -ENOSPC) {
+ pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
+ ret);
+ list_add_tail(&forget->list, &fsvq->queued_reqs);
+ schedule_delayed_work(&fsvq->dispatch_work,
+ msecs_to_jiffies(1));
+ if (!in_flight)
+ inc_in_flight_req(fsvq);
+ /* Queue is full */
+ ret = 1;
+ } else {
+ pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+ ret);
+ kfree(forget);
+ if (in_flight)
+ dec_in_flight_req(fsvq);
+ }
+ goto out;
+ }
+
+ if (!in_flight)
+ inc_in_flight_req(fsvq);
+ notify = virtqueue_kick_prepare(vq);
+ spin_unlock(&fsvq->lock);
+
+ if (notify)
+ virtqueue_notify(vq);
+ return ret;
+out:
+ spin_unlock(&fsvq->lock);
+ return ret;
+}
+
+static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
+{
+ struct virtio_fs_forget *forget;
+ struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+ dispatch_work.work);
+ pr_debug("virtio-fs: worker %s called.\n", __func__);
+ while (1) {
+ spin_lock(&fsvq->lock);
+ forget = list_first_entry_or_null(&fsvq->queued_reqs,
+ struct virtio_fs_forget, list);
+ if (!forget) {
spin_unlock(&fsvq->lock);
return;
}
- fsvq->in_flight++;
- notify = virtqueue_kick_prepare(vq);
+ list_del(&forget->list);
spin_unlock(&fsvq->lock);
-
- if (notify)
- virtqueue_notify(vq);
- pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
- __func__);
+ if (send_forget_request(fsvq, forget, true))
+ return;
}
}
@@@ -545,7 -452,7 +545,7 @@@ static void virtio_fs_requests_done_wor
fuse_request_end(fc, req);
spin_lock(&fsvq->lock);
- fsvq->in_flight--;
+ dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
}
}
@@@ -595,10 -502,8 +595,10 @@@ static int virtio_fs_setup_vqs(struct v
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+ INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
virtio_fs_hiprio_dispatch_work);
+ init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero);
spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
/* Initialize the requests virtqueues */
@@@ -606,10 -511,8 +606,10 @@@
spin_lock_init(&fs->vqs[i].lock);
INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
- virtio_fs_dummy_dispatch_work);
+ virtio_fs_request_dispatch_work);
INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+ INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
+ init_completion(&fs->vqs[i].in_flight_zero);
snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
"requests.%u", i - VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done;
@@@ -703,7 -606,7 +703,7 @@@ static void virtio_fs_remove(struct vir
/* This device is going away. No one should get new reference */
list_del_init(&fs->list);
virtio_fs_stop_all_queues(fs);
- virtio_fs_drain_all_queues(fs);
+ virtio_fs_drain_all_queues_locked(fs);
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
@@@ -728,12 -631,12 +728,12 @@@ static int virtio_fs_restore(struct vir
}
#endif /* CONFIG_PM_SLEEP */
-const static struct virtio_device_id id_table[] = {
+static const struct virtio_device_id id_table[] = {
{ VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
{},
};
-const static unsigned int feature_table[] = {};
+static const unsigned int feature_table[] = {};
static struct virtio_driver virtio_fs_driver = {
.driver.name = KBUILD_MODNAME,
@@@ -754,10 -657,14 +754,10 @@@ __releases(fiq->lock
{
struct fuse_forget_link *link;
struct virtio_fs_forget *forget;
- struct scatterlist sg;
- struct scatterlist *sgs[] = {&sg};
+ struct virtio_fs_forget_req *req;
struct virtio_fs *fs;
- struct virtqueue *vq;
struct virtio_fs_vq *fsvq;
- bool notify;
u64 unique;
- int ret;
link = fuse_dequeue_forget(fiq, 1, NULL);
unique = fuse_get_unique(fiq);
@@@ -768,19 -675,56 +768,19 @@@
/* Allocate a buffer for the request */
forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
+ req = &forget->req;
- forget->ih = (struct fuse_in_header){
+ req->ih = (struct fuse_in_header){
.opcode = FUSE_FORGET,
.nodeid = link->forget_one.nodeid,
.unique = unique,
- .len = sizeof(*forget),
+ .len = sizeof(*req),
};
- forget->arg = (struct fuse_forget_in){
+ req->arg = (struct fuse_forget_in){
.nlookup = link->forget_one.nlookup,
};
- sg_init_one(&sg, forget, sizeof(*forget));
-
- /* Enqueue the request */
- spin_lock(&fsvq->lock);
-
- if (!fsvq->connected) {
- kfree(forget);
- spin_unlock(&fsvq->lock);
- goto out;
- }
-
- vq = fsvq->vq;
- dev_dbg(&vq->vdev->dev, "%s\n", __func__);
-
- ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
- if (ret < 0) {
- if (ret == -ENOMEM || ret == -ENOSPC) {
- pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
- ret);
- list_add_tail(&forget->list, &fsvq->queued_reqs);
- schedule_delayed_work(&fsvq->dispatch_work,
- msecs_to_jiffies(1));
- } else {
- pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
- ret);
- kfree(forget);
- }
- spin_unlock(&fsvq->lock);
- goto out;
- }
-
- fsvq->in_flight++;
- notify = virtqueue_kick_prepare(vq);
-
- spin_unlock(&fsvq->lock);
-
- if (notify)
- virtqueue_notify(vq);
-out:
+ send_forget_request(fsvq, forget, false);
kfree(link);
}
@@@ -875,7 -819,7 +875,7 @@@ static unsigned int sg_init_fuse_args(s
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req)
+ struct fuse_req *req, bool in_flight)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@@@ -891,7 -835,6 +891,7 @@@
unsigned int i;
int ret;
bool notify;
+ struct fuse_pqueue *fpq;
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
@@@ -946,17 -889,7 +946,17 @@@
goto out;
}
- fsvq->in_flight++;
+ /* Request successfully sent. */
+ fpq = &fsvq->fud->pq;
+ spin_lock(&fpq->lock);
+ list_add_tail(&req->list, fpq->processing);
+ spin_unlock(&fpq->lock);
+ set_bit(FR_SENT, &req->flags);
+ /* matches barrier in request_wait_answer() */
+ smp_mb__after_atomic();
+
+ if (!in_flight)
+ inc_in_flight_req(fsvq);
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@@ -982,8 -915,9 +982,8 @@@ __releases(fiq->lock
{
unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
struct virtio_fs *fs;
- struct fuse_conn *fc;
struct fuse_req *req;
- struct fuse_pqueue *fpq;
+ struct virtio_fs_vq *fsvq;
int ret;
WARN_ON(list_empty(&fiq->pending));
@@@ -994,41 -928,49 +994,41 @@@
spin_unlock(&fiq->lock);
fs = fiq->priv;
- fc = fs->vqs[queue_id].fud->fc;
pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
__func__, req->in.h.opcode, req->in.h.unique,
req->in.h.nodeid, req->in.h.len,
fuse_len_args(req->args->out_numargs, req->args->out_args));
- fpq = &fs->vqs[queue_id].fud->pq;
- spin_lock(&fpq->lock);
- if (!fpq->connected) {
- spin_unlock(&fpq->lock);
- req->out.h.error = -ENODEV;
- pr_err("virtio-fs: %s disconnected\n", __func__);
- fuse_request_end(fc, req);
- return;
- }
- list_add_tail(&req->list, fpq->processing);
- spin_unlock(&fpq->lock);
- set_bit(FR_SENT, &req->flags);
- /* matches barrier in request_wait_answer() */
- smp_mb__after_atomic();
-
-retry:
- ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
+ fsvq = &fs->vqs[queue_id];
+ ret = virtio_fs_enqueue_req(fsvq, req, false);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
- /* Virtqueue full. Retry submission */
- /* TODO use completion instead of timeout */
- usleep_range(20, 30);
- goto retry;
+ /*
+ * Virtqueue full. Retry submission from worker
+ * context as we might be holding fc->bg_lock.
+ */
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->queued_reqs);
+ inc_in_flight_req(fsvq);
+ schedule_delayed_work(&fsvq->dispatch_work,
+ msecs_to_jiffies(1));
+ spin_unlock(&fsvq->lock);
+ return;
}
req->out.h.error = ret;
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
- spin_lock(&fpq->lock);
- clear_bit(FR_SENT, &req->flags);
- list_del_init(&req->list);
- spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
+
+ /* Can't end request in submission context. Use a worker */
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->end_reqs);
+ schedule_delayed_work(&fsvq->dispatch_work, 0);
+ spin_unlock(&fsvq->lock);
return;
}
}
-const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
+static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
.wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock,
.wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock,
.wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock,
@@@ -1050,7 -992,6 +1050,7 @@@ static int virtio_fs_fill_super(struct
.destroy = true,
.no_control = true,
.no_force_umount = true,
+ .no_mount_options = true,
};
mutex_lock(&virtio_fs_mutex);
diff --combined include/linux/filter.h
index 1b1e8b8f88da,fa2d921d7a22..c3fefb85eb5f
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@@ -65,9 -65,6 +65,9 @@@ struct ctl_table_header
/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL 0xf0
+/* unused opcode to mark special load instruction. Same as BPF_ABS */
+#define BPF_PROBE_MEM 0x20
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@@ -420,7 -417,7 +420,7 @@@ static inline bool insn_is_zext(const s
#define BPF_FIELD_SIZEOF(type, field) \
({ \
- const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \
+ const int __size = bytes_to_bpf_size(sizeof_member(type, field)); \
BUILD_BUG_ON(__size < 0); \
__size; \
})
@@@ -467,11 -464,10 +467,11 @@@
#define BPF_CALL_x(x, name, ...) \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
+ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \
u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \
{ \
- return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
} \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
@@@ -497,7 -493,7 +497,7 @@@
#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \
({ \
- BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \
+ BUILD_BUG_ON(sizeof_member(TYPE, MEMBER) != (SIZE)); \
*(PTR_SIZE) = (SIZE); \
offsetof(TYPE, MEMBER); \
})
@@@ -515,12 -511,10 +515,12 @@@ struct sock_fprog_kern
struct sock_filter *filter;
};
+/* Some arches need doubleword alignment for their instructions and/or data */
+#define BPF_IMAGE_ALIGNMENT 8
+
struct bpf_binary_header {
u32 pages;
- /* Some arches need word alignment for their instructions */
- u8 image[] __aligned(4);
+ u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
};
struct bpf_prog {
@@@ -608,7 -602,7 +608,7 @@@ static inline void bpf_compute_data_poi
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
- BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_member(struct sk_buff, cb));
cb->data_meta = skb->data - skb_metadata_len(skb);
cb->data_end = skb->data + skb_headlen(skb);
}
@@@ -646,9 -640,9 +646,9 @@@ static inline u8 *bpf_skb_cb(struct sk_
* attached to sockets, we need to clear the bpf_skb_cb() area
* to not leak previous contents to user space.
*/
- BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
- BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
- FIELD_SIZEOF(struct qdisc_skb_cb, data));
+ BUILD_BUG_ON(sizeof_member(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
+ BUILD_BUG_ON(sizeof_member(struct __sk_buff, cb) !=
+ sizeof_member(struct qdisc_skb_cb, data));
return qdisc_skb_cb(skb)->data;
}
@@@ -952,9 -946,6 +952,9 @@@ void *bpf_jit_alloc_exec(unsigned long
void bpf_jit_free_exec(void *addr);
void bpf_jit_free(struct bpf_prog *fp);
+int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
+ struct bpf_jit_poke_descriptor *poke);
+
int bpf_jit_get_func_addr(const struct bpf_prog *prog,
const struct bpf_insn *insn, bool extra_pass,
u64 *func_addr, bool *func_addr_fixed);
@@@ -1053,23 -1044,11 +1053,23 @@@ static inline bool ebpf_jit_enabled(voi
return false;
}
+static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
+{
+ return false;
+}
+
static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
{
return false;
}
+static inline int
+bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
+ struct bpf_jit_poke_descriptor *poke)
+{
+ return -ENOTSUPP;
+}
+
static inline void bpf_jit_free(struct bpf_prog *fp)
{
bpf_prog_unlock_free(fp);
@@@ -1120,6 -1099,7 +1120,6 @@@ static inline void bpf_get_prog_name(co
#endif /* CONFIG_BPF_JIT */
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
#define BPF_ANC BIT(15)
diff --combined include/linux/kvm_host.h
index 7ed1e2f8641e,ebbf4f1a221a..6c626d5455cc
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@@ -149,7 -149,7 +149,7 @@@ static inline bool is_error_page(struc
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
- BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) -
KVM_REQUEST_ARCH_BASE); \
+ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_member(struct kvm_vcpu, requests) * 8) -
KVM_REQUEST_ARCH_BASE); \
(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
})
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
@@@ -266,8 -266,7 +266,8 @@@ struct kvm_vcpu
struct preempt_notifier preempt_notifier;
#endif
int cpu;
- int vcpu_id;
+ int vcpu_id; /* id given by userspace at creation */
+ int vcpu_idx; /* index in kvm->vcpus array */
int srcu_idx;
int mode;
u64 requests;
@@@ -279,6 -278,7 +279,6 @@@
struct mutex mutex;
struct kvm_run *run;
- int guest_xcr0_loaded;
struct swait_queue_head wq;
struct pid __rcu *pid;
int sigset_active;
@@@ -571,7 -571,13 +571,7 @@@ static inline struct kvm_vcpu *kvm_get_
static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *tmp;
- int idx;
-
- kvm_for_each_vcpu(idx, tmp, vcpu->kvm)
- if (tmp == vcpu)
- return idx;
- BUG();
+ return vcpu->vcpu_idx;
}
#define kvm_for_each_memslot(memslot, slots) \
@@@ -616,7 -622,6 +616,7 @@@ void kvm_exit(void)
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
+void kvm_put_kvm_no_destroy(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
@@@ -741,28 -746,6 +741,28 @@@ int kvm_write_guest_offset_cached(struc
unsigned long len);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len);
+
+#define __kvm_put_guest(kvm, gfn, offset, value, type) \
+({ \
+ unsigned long __addr = gfn_to_hva(kvm, gfn); \
+ type __user *__uaddr = (type __user *)(__addr + offset); \
+ int __ret = -EFAULT; \
+ \
+ if (!kvm_is_error_hva(__addr)) \
+ __ret = put_user(value, __uaddr); \
+ if (!__ret) \
+ mark_page_dirty(kvm, gfn); \
+ __ret; \
+})
+
+#define kvm_put_guest(kvm, gpa, value, type) \
+({ \
+ gpa_t __gpa = gpa; \
+ struct kvm *__kvm = kvm; \
+ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \
+ offset_in_page(__gpa), (value), type); \
+})
+
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
@@@ -808,8 -791,6 +808,8 @@@ void kvm_reload_remote_mmus(struct kvm
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
unsigned long *vcpu_bitmap, cpumask_var_t tmp);
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
+ unsigned long *vcpu_bitmap);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@@ -985,7 -966,6 +985,7 @@@ int kvm_cpu_has_pending_timer(struct kv
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
@@@ -1260,7 -1240,7 +1260,7 @@@ extern unsigned int halt_poll_ns_grow_s
extern unsigned int halt_poll_ns_shrink;
struct kvm_device {
- struct kvm_device_ops *ops;
+ const struct kvm_device_ops *ops;
struct kvm *kvm;
void *private;
struct list_head vm_node;
@@@ -1313,7 -1293,7 +1313,7 @@@ struct kvm_device_ops
void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
+int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
@@@ -1402,10 -1382,4 +1402,10 @@@ static inline int kvm_arch_vcpu_run_pid
}
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr);
+
#endif
diff --combined include/linux/slab.h
index 4d2a2fa55ed5,2afb630bce2a..6d52953dbdf0
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@@ -179,7 -179,7 +179,7 @@@ void memcg_deactivate_kmem_caches(struc
sizeof(struct __struct), \
__alignof__(struct __struct), (__flags), \
offsetof(struct __struct, __field), \
- sizeof_field(struct __struct, __field), NULL)
+ sizeof_member(struct __struct, __field), NULL)
/*
* Common kmalloc functions provided by all allocators
@@@ -493,10 -493,6 +493,10 @@@ static __always_inline void *kmalloc_la
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
*
+ * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
+ * bytes. For @size of power of two bytes, the alignment is also guaranteed
+ * to be at least to the size.
+ *
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp.h and described at
* :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>`
diff --combined include/net/sock.h
index 87d54ef57f00,6a5a386410ff..d036017e6b8b
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@@ -66,6 -66,7 +66,6 @@@
#include <net/checksum.h>
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
-#include <net/smc.h>
#include <net/l3mdev.h>
/*
@@@ -859,17 -860,17 +859,17 @@@ static inline gfp_t sk_gfp_mask(const s
static inline void sk_acceptq_removed(struct sock *sk)
{
- sk->sk_ack_backlog--;
+ WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1);
}
static inline void sk_acceptq_added(struct sock *sk)
{
- sk->sk_ack_backlog++;
+ WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
}
static inline bool sk_acceptq_is_full(const struct sock *sk)
{
- return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
+ return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
}
/*
@@@ -877,17 -878,12 +877,17 @@@
*/
static inline int sk_stream_min_wspace(const struct sock *sk)
{
- return sk->sk_wmem_queued >> 1;
+ return READ_ONCE(sk->sk_wmem_queued) >> 1;
}
static inline int sk_stream_wspace(const struct sock *sk)
{
- return sk->sk_sndbuf - sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
+}
+
+static inline void sk_wmem_queued_add(struct sock *sk, int val)
+{
+ WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}
void sk_stream_write_space(struct sock *sk);
@@@ -899,11 -895,11 +899,11 @@@ static inline void __sk_add_backlog(str
skb_dst_force(skb);
if (!sk->sk_backlog.tail)
- sk->sk_backlog.head = skb;
+ WRITE_ONCE(sk->sk_backlog.head, skb);
else
sk->sk_backlog.tail->next = skb;
- sk->sk_backlog.tail = skb;
+ WRITE_ONCE(sk->sk_backlog.tail, skb);
skb->next = NULL;
}
@@@ -953,8 -949,8 +953,8 @@@ static inline void sk_incoming_cpu_upda
{
int cpu = raw_smp_processor_id();
- if (unlikely(sk->sk_incoming_cpu != cpu))
- sk->sk_incoming_cpu = cpu;
+ if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
+ WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
static inline void sock_rps_record_flow_hash(__u32 hash)
@@@ -1211,7 -1207,7 +1211,7 @@@ static inline void sk_refcnt_debug_rele
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
- if (sk->sk_wmem_queued >= sk->sk_sndbuf)
+ if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
return false;
return sk->sk_prot->stream_memory_free ?
@@@ -1471,7 -1467,7 +1471,7 @@@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cac
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- sk->sk_wmem_queued -= skb->truesize;
+ sk_wmem_queued_add(sk, -skb->truesize);
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
@@@ -1488,7 -1484,7 +1488,7 @@@ static inline void sock_release_ownersh
sk->sk_lock.owned = 0;
/* The sk_lock has mutex_unlock() semantics: */
- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
}
}
@@@ -1939,8 -1935,8 +1939,8 @@@ struct dst_entry *sk_dst_check(struct s
static inline void sk_dst_confirm(struct sock *sk)
{
- if (!sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 1;
+ if (!READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 1);
}
static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
@@@ -1950,10 -1946,10 +1950,10 @@@
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
- if (sk && sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 0;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
+ if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
}
}
@@@ -2018,7 -2014,7 +2018,7 @@@ static inline int skb_copy_to_page_noca
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
}
@@@ -2224,14 -2220,10 +2224,14 @@@ static inline void sk_wake_async(const
static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
- if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
- sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
- sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
- }
+ u32 val;
+
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ return;
+
+ val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
+
+ WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
@@@ -2241,17 -2233,12 +2241,17 @@@
* sk_page_frag - return an appropriate page_frag
* @sk: socket
*
- * If socket allocation mode allows current thread to sleep, it means its
- * safe to use the per task page_frag instead of the per socket one.
+ * Use the per task page_frag instead of the per socket one for
+ * optimization when we know that we're in the normal context and owns
+ * everything that's associated with %current.
+ *
+ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+ * inside other socket operations and end up recursing into sk_page_frag()
+ * while it's already in use.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
- if (gfpflags_allow_blocking(sk->sk_allocation))
+ if (gfpflags_normal_context(sk->sk_allocation))
return ¤t->task_frag;
return &sk->sk_frag;
@@@ -2264,7 -2251,7 +2264,7 @@@ bool sk_page_frag_refill(struct sock *s
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+ return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf)
>> 1);
}
static inline gfp_t gfp_any(void)
@@@ -2284,9 -2271,7 +2284,9 @@@ static inline long sock_sndtimeo(const
static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
{
- return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
+ int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len);
+
+ return v ?: 1;
}
/* Alas, with timeout socket operations are not restartable.
@@@ -2305,7 -2290,7 +2305,7 @@@ struct sock_skb_cb
* using skb->cb[] would keep using it directly and utilize its
* alignement guarantee.
*/
- #define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
+ #define SOCK_SKB_CB_OFFSET ((sizeof_member(struct sk_buff, cb) - \
sizeof(struct sock_skb_cb)))
#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
@@@ -2341,7 -2326,7 +2341,7 @@@ static inline ktime_t sock_read_timesta
return kt;
#else
- return sk->sk_stamp;
+ return READ_ONCE(sk->sk_stamp);
#endif
}
@@@ -2352,7 -2337,7 +2352,7 @@@ static inline void sock_write_timestamp
sk->sk_stamp = kt;
write_sequnlock(&sk->sk_stamp_seq);
#else
- sk->sk_stamp = kt;
+ WRITE_ONCE(sk->sk_stamp, kt);
#endif
}
@@@ -2527,7 -2512,7 +2527,7 @@@ static inline bool sk_listener(const st
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
-void sock_enable_timestamp(struct sock *sk, int flag);
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);
diff --combined kernel/bpf/cgroup.c
index 9f90d3c92bda,3ff6df3ab022..b0adc5e83638
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@@ -180,8 -180,8 +180,8 @@@ static void activate_effective_progs(st
enum bpf_attach_type type,
struct bpf_prog_array *old_array)
{
- rcu_swap_protected(cgrp->bpf.effective[type], old_array,
- lockdep_is_held(&cgroup_mutex));
+ old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
+ lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
@@@ -1311,12 -1311,12 +1311,12 @@@ static bool sysctl_is_valid_access(int
return false;
switch (off) {
- case offsetof(struct bpf_sysctl, write):
+ case bpf_ctx_range(struct bpf_sysctl, write):
if (type != BPF_READ)
return false;
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
- case offsetof(struct bpf_sysctl, file_pos):
+ case bpf_ctx_range(struct bpf_sysctl, file_pos):
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
@@@ -1341,7 -1341,7 +1341,7 @@@ static u32 sysctl_convert_ctx_access(en
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sysctl_kern, write,
- FIELD_SIZEOF(struct bpf_sysctl_kern,
+ sizeof_member(struct bpf_sysctl_kern,
write),
target_size));
break;
diff --combined kernel/bpf/local_storage.c
index 2ba750725cb2,ea4117fabdec..e13992995a05
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@@ -357,7 -357,7 +357,7 @@@ static int cgroup_storage_check_btf(con
* The first field must be a 64 bit integer at 0 offset.
*/
m = (struct btf_member *)(key_type + 1);
- size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id);
+ size = sizeof_member(struct bpf_cgroup_storage_key, cgroup_inode_id);
if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
return -EINVAL;
@@@ -366,7 -366,7 +366,7 @@@
*/
m++;
offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
- size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type);
+ size = sizeof_member(struct bpf_cgroup_storage_key, attach_type);
if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
return -EINVAL;
@@@ -569,7 -569,7 +569,7 @@@ void bpf_cgroup_storage_link(struct bpf
return;
storage->key.attach_type = type;
- storage->key.cgroup_inode_id = cgroup->kn->id.id;
+ storage->key.cgroup_inode_id = cgroup_id(cgroup);
map = storage->map;
diff --combined kernel/fork.c
index 2a89887e7ade,1b45b28d4bf5..acdf70a917ed
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@@ -1283,8 -1283,24 +1283,8 @@@ static int wait_for_vfork_done(struct t
* restoring the old one. . .
* Eric Biederman 10 January 1998
*/
-void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
- /* Get rid of any futexes when releasing the mm */
-#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list)) {
- exit_robust_list(tsk);
- tsk->robust_list = NULL;
- }
-#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list)) {
- compat_exit_robust_list(tsk);
- tsk->compat_robust_list = NULL;
- }
-#endif
- if (unlikely(!list_empty(&tsk->pi_state_list)))
- exit_pi_state_list(tsk);
-#endif
-
uprobe_free_utask(tsk);
/* Get rid of any cached register state */
@@@ -1317,18 -1333,6 +1317,18 @@@
complete_vfork_done(tsk);
}
+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ futex_exit_release(tsk);
+ mm_release(tsk, mm);
+}
+
+void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ futex_exec_release(tsk);
+ mm_release(tsk, mm);
+}
+
/**
* dup_mm() - duplicates an existing mm structure
* @tsk: the task_struct with which the new mm will be associated.
@@@ -1513,11 -1517,6 +1513,11 @@@ static int copy_sighand(unsigned long c
spin_lock_irq(¤t->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
spin_unlock_irq(¤t->sighand->siglock);
+
+ /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */
+ if (clone_flags & CLONE_CLEAR_SIGHAND)
+ flush_signal_handlers(tsk, 0);
+
return 0;
}
@@@ -1696,68 -1695,12 +1696,68 @@@ static int pidfd_release(struct inode *
}
#ifdef CONFIG_PROC_FS
+/**
+ * pidfd_show_fdinfo - print information about a pidfd
+ * @m: proc fdinfo file
+ * @f: file referencing a pidfd
+ *
+ * Pid:
+ * This function will print the pid that a given pidfd refers to in the
+ * pid namespace of the procfs instance.
+ * If the pid namespace of the process is not a descendant of the pid
+ * namespace of the procfs instance 0 will be shown as its pid. This is
+ * similar to calling getppid() on a process whose parent is outside of
+ * its pid namespace.
+ *
+ * NSpid:
+ * If pid namespaces are supported then this function will also print
+ * the pid of a given pidfd refers to for all descendant pid namespaces
+ * starting from the current pid namespace of the instance, i.e. the
+ * Pid field and the first entry in the NSpid field will be identical.
+ * If the pid namespace of the process is not a descendant of the pid
+ * namespace of the procfs instance 0 will be shown as its first NSpid
+ * entry and no others will be shown.
+ * Note that this differs from the Pid and NSpid fields in
+ * /proc/<pid>/status where Pid and NSpid are always shown relative to
+ * the pid namespace of the procfs instance. The difference becomes
+ * obvious when sending around a pidfd between pid namespaces from a
+ * different branch of the tree, i.e. where no ancestoral relation is
+ * present between the pid namespaces:
+ * - create two new pid namespaces ns1 and ns2 in the initial pid
+ * namespace (also take care to create new mount namespaces in the
+ * new pid namespace and mount procfs)
+ * - create a process with a pidfd in ns1
+ * - send pidfd from ns1 to ns2
+ * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
+ * have exactly one entry, which is 0
+ */
static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
{
- struct pid_namespace *ns = proc_pid_ns(file_inode(m->file));
struct pid *pid = f->private_data;
+ struct pid_namespace *ns;
+ pid_t nr = -1;
+
+ if (likely(pid_has_task(pid, PIDTYPE_PID))) {
+ ns = proc_pid_ns(file_inode(m->file));
+ nr = pid_nr_ns(pid, ns);
+ }
+
+ seq_put_decimal_ll(m, "Pid:\t", nr);
+
+#ifdef CONFIG_PID_NS
+ seq_put_decimal_ll(m, "\nNSpid:\t", nr);
+ if (nr > 0) {
+ int i;
- seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+ /* If nr is non-zero it means that 'pid' is valid and that
+ * ns, i.e. the pid namespace associated with the procfs
+ * instance, is in the pid namespace hierarchy of pid.
+ * Start at one below the already printed level.
+ */
+ for (i = ns->level + 1; i <= pid->level; i++)
+ seq_put_decimal_ll(m, "\t", pid->numbers[i].nr);
+ }
+#endif
seq_putc(m, '\n');
}
#endif
@@@ -1765,11 -1708,11 +1765,11 @@@
/*
* Poll support for process exit notification.
*/
-static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct task_struct *task;
struct pid *pid = file->private_data;
- int poll_flags = 0;
+ __poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
@@@ -1781,7 -1724,7 +1781,7 @@@
* group, then poll(2) should block, similar to the wait(2) family.
*/
if (!task || (task->exit_state && thread_group_empty(task)))
- poll_flags = POLLIN | POLLRDNORM;
+ poll_flags = EPOLLIN | EPOLLRDNORM;
rcu_read_unlock();
return poll_flags;
@@@ -2083,8 -2026,7 +2083,8 @@@ static __latent_entropy struct task_str
stackleak_task_init(p);
if (pid != &init_struct_pid) {
- pid = alloc_pid(p->nsproxy->pid_ns_for_children);
+ pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
+ args->set_tid_size);
if (IS_ERR(pid)) {
retval = PTR_ERR(pid);
goto bad_fork_cleanup_thread;
@@@ -2120,8 -2062,14 +2120,8 @@@
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
-#ifdef CONFIG_FUTEX
- p->robust_list = NULL;
-#ifdef CONFIG_COMPAT
- p->compat_robust_list = NULL;
-#endif
- INIT_LIST_HEAD(&p->pi_state_list);
- p->pi_state_cache = NULL;
-#endif
+ futex_init_task(p);
+
/*
* sigaltstack should be cleared when sharing the same VM
*/
@@@ -2182,7 -2130,7 +2182,7 @@@
*/
p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boottime_ns();
+ p->start_boottime = ktime_get_boottime_ns();
/*
* Make it visible to the rest of the system, but dont wake it up yet.
@@@ -2581,7 -2529,6 +2581,7 @@@ noinline static int copy_clone_args_fro
{
int err;
struct clone_args args;
+ pid_t *kset_tid = kargs->set_tid;
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
@@@ -2592,15 -2539,6 +2592,15 @@@
if (err)
return err;
+ if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
+ return -EINVAL;
+
+ if (unlikely(!args.set_tid && args.set_tid_size > 0))
+ return -EINVAL;
+
+ if (unlikely(args.set_tid && args.set_tid_size == 0))
+ return -EINVAL;
+
/*
* Verify that higher 32bits of exit_signal are unset and that
* it is a valid signal
@@@ -2618,51 -2556,18 +2618,51 @@@
.stack = args.stack,
.stack_size = args.stack_size,
.tls = args.tls,
+ .set_tid_size = args.set_tid_size,
};
+ if (args.set_tid &&
+ copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
+ (kargs->set_tid_size * sizeof(pid_t))))
+ return -EFAULT;
+
+ kargs->set_tid = kset_tid;
+
return 0;
}
-static bool clone3_args_valid(const struct kernel_clone_args *kargs)
+/**
+ * clone3_stack_valid - check and prepare stack
+ * @kargs: kernel clone args
+ *
+ * Verify that the stack arguments userspace gave us are sane.
+ * In addition, set the stack direction for userspace since it's easy for us to
+ * determine.
+ */
+static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
{
- /*
- * All lower bits of the flag word are taken.
- * Verify that no other unknown flags are passed along.
- */
- if (kargs->flags & ~CLONE_LEGACY_FLAGS)
+ if (kargs->stack == 0) {
+ if (kargs->stack_size > 0)
+ return false;
+ } else {
+ if (kargs->stack_size == 0)
+ return false;
+
+ if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
+ return false;
+
+#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
+ kargs->stack += kargs->stack_size;
+#endif
+ }
+
+ return true;
+}
+
+static bool clone3_args_valid(struct kernel_clone_args *kargs)
+{
+ /* Verify that no unknown flags are passed along. */
+ if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND))
return false;
/*
@@@ -2672,17 -2577,10 +2672,17 @@@
if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
return false;
+ if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
+ (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND))
+ return false;
+
if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
kargs->exit_signal)
return false;
+ if (!clone3_stack_valid(kargs))
+ return false;
+
return true;
}
@@@ -2702,9 -2600,6 +2702,9 @@@ SYSCALL_DEFINE2(clone3, struct clone_ar
int err;
struct kernel_clone_args kargs;
+ pid_t set_tid[MAX_PID_NS_LEVEL];
+
+ kargs.set_tid = set_tid;
err = copy_clone_args_from_user(&kargs, uargs, size);
if (err)
@@@ -2793,7 -2688,7 +2793,7 @@@ void __init proc_caches_init(void
mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
offsetof(struct mm_struct, saved_auxv),
- sizeof_field(struct mm_struct, saved_auxv),
+ sizeof_member(struct mm_struct, saved_auxv),
NULL);
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
mmap_init();
@@@ -3030,7 -2925,7 +3030,7 @@@ int sysctl_max_threads(struct ctl_tabl
struct ctl_table t;
int ret;
int threads = max_threads;
- int min = MIN_THREADS;
+ int min = 1;
int max = MAX_THREADS;
t = *table;
@@@ -3042,7 -2937,7 +3042,7 @@@
if (ret || !write)
return ret;
- set_max_threads(threads);
+ max_threads = threads;
return 0;
}
diff --combined kernel/signal.c
index bcd46f547db3,52628a975705..4c1a171eb68f
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@@ -2205,8 -2205,8 +2205,8 @@@ static void ptrace_stop(int exit_code,
*/
preempt_disable();
read_unlock(&tasklist_lock);
- preempt_enable_no_resched();
cgroup_enter_frozen();
+ preempt_enable_no_resched();
freezable_schedule();
cgroup_leave_frozen(true);
} else {
@@@ -4548,11 -4548,11 +4548,11 @@@ static inline void siginfo_buildtime_ch
BUILD_BUG_ON(offsetof(struct siginfo, si_pid) !=
offsetof(struct siginfo, si_addr));
if (sizeof(int) == sizeof(void __user *)) {
- BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) !=
+ BUILD_BUG_ON(sizeof_member(struct siginfo, si_pid) !=
sizeof(void __user *));
} else {
- BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) +
- sizeof_field(struct siginfo, si_uid)) !=
+ BUILD_BUG_ON((sizeof_member(struct siginfo, si_pid) +
+ sizeof_member(struct siginfo, si_uid)) !=
sizeof(void __user *));
BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) !=
offsetof(struct siginfo, si_uid));
@@@ -4560,10 -4560,10 +4560,10 @@@
#ifdef CONFIG_COMPAT
BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) !=
offsetof(struct compat_siginfo, si_addr));
- BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
+ BUILD_BUG_ON(sizeof_member(struct compat_siginfo, si_pid) !=
sizeof(compat_uptr_t));
- BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
- sizeof_field(struct siginfo, si_pid));
+ BUILD_BUG_ON(sizeof_member(struct compat_siginfo, si_pid) !=
+ sizeof_member(struct siginfo, si_pid));
#endif
}
diff --combined net/caif/caif_socket.c
index ef14da50a981,0dbd93bb1939..61bd13d3d637
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@@ -953,7 -953,7 +953,7 @@@ static __poll_t caif_poll(struct file *
mask |= EPOLLRDHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
mask |= EPOLLIN | EPOLLRDNORM;
@@@ -1033,7 -1033,7 +1033,7 @@@ static int caif_create(struct net *net
.owner = THIS_MODULE,
.obj_size = sizeof(struct caifsock),
.useroffset = offsetof(struct caifsock, conn_req.param),
- .usersize = sizeof_field(struct caifsock, conn_req.param)
+ .usersize = sizeof_member(struct caifsock, conn_req.param)
};
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
diff --combined net/core/dev.c
index 46580b290450,c434e94167ca..d75fd04d4e2c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -146,7 -146,6 +146,7 @@@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
+#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@@ -229,122 -228,6 +229,122 @@@ static inline void rps_unlock(struct so
#endif
}
+static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
+ const char *name)
+{
+ struct netdev_name_node *name_node;
+
+ name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
+ if (!name_node)
+ return NULL;
+ INIT_HLIST_NODE(&name_node->hlist);
+ name_node->dev = dev;
+ name_node->name = name;
+ return name_node;
+}
+
+static struct netdev_name_node *
+netdev_name_node_head_alloc(struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+
+ name_node = netdev_name_node_alloc(dev, dev->name);
+ if (!name_node)
+ return NULL;
+ INIT_LIST_HEAD(&name_node->list);
+ return name_node;
+}
+
+static void netdev_name_node_free(struct netdev_name_node *name_node)
+{
+ kfree(name_node);
+}
+
+static void netdev_name_node_add(struct net *net,
+ struct netdev_name_node *name_node)
+{
+ hlist_add_head_rcu(&name_node->hlist,
+ dev_name_hash(net, name_node->name));
+}
+
+static void netdev_name_node_del(struct netdev_name_node *name_node)
+{
+ hlist_del_rcu(&name_node->hlist);
+}
+
+static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
+ const char *name)
+{
+ struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *name_node;
+
+ hlist_for_each_entry(name_node, head, hlist)
+ if (!strcmp(name_node->name, name))
+ return name_node;
+ return NULL;
+}
+
+static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
+ const char *name)
+{
+ struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *name_node;
+
+ hlist_for_each_entry_rcu(name_node, head, hlist)
+ if (!strcmp(name_node->name, name))
+ return name_node;
+ return NULL;
+}
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name)
+{
+ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ name_node = netdev_name_node_lookup(net, name);
+ if (name_node)
+ return -EEXIST;
+ name_node = netdev_name_node_alloc(dev, name);
+ if (!name_node)
+ return -ENOMEM;
+ netdev_name_node_add(net, name_node);
+ /* The node that holds dev->name acts as a head of per-device list. */
+ list_add_tail(&name_node->list, &dev->name_node->list);
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_create);
+
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+ list_del(&name_node->list);
+ netdev_name_node_del(name_node);
+ kfree(name_node->name);
+ netdev_name_node_free(name_node);
+}
+
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
+{
+ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ name_node = netdev_name_node_lookup(net, name);
+ if (!name_node)
+ return -ENOENT;
+ __netdev_name_node_alt_destroy(name_node);
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_destroy);
+
+static void netdev_name_node_alt_flush(struct net_device *dev)
+{
+ struct netdev_name_node *name_node, *tmp;
+
+ list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
+ __netdev_name_node_alt_destroy(name_node);
+}
+
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
@@@ -354,7 -237,7 +354,7 @@@
write_lock_bh(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
- hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
@@@ -372,7 -255,7 +372,7 @@@ static void unlist_netdevice(struct net
/* Unlink dev from the device chain */
write_lock_bh(&dev_base_lock);
list_del_rcu(&dev->dev_list);
- hlist_del_rcu(&dev->name_hlist);
+ netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
@@@ -393,6 -276,88 +393,6 @@@ static RAW_NOTIFIER_HEAD(netdev_chain)
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
-#ifdef CONFIG_LOCKDEP
-/*
- * register_netdevice() inits txq->_xmit_lock and sets lockdep class
- * according to dev->type
- */
-static const unsigned short netdev_lock_type[] = {
- ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
- ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
- ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
- ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
- ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
- ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
- ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
- ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
- ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
- ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
- ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
- ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
- ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
- ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
- ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
-
-static const char *const netdev_lock_name[] = {
- "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER",
"_xmit_AX25",
- "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802",
"_xmit_ARCNET",
- "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM",
"_xmit_METRICOM",
- "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND",
"_xmit_SLIP",
- "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6",
"_xmit_RSRVD",
- "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25",
"_xmit_HWX25",
- "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB",
"_xmit_DDCMP",
- "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6",
"_xmit_FRAD",
- "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK",
"_xmit_FDDI",
- "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP",
"_xmit_IPGRE",
- "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH",
"_xmit_ECONET",
- "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL",
"_xmit_FCPL",
- "_xmit_FCFABRIC", "_xmit_IEEE80211",
"_xmit_IEEE80211_PRISM",
- "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
"_xmit_PHONET_PIPE",
- "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
-
-static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
-static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
-
-static inline unsigned short netdev_lock_pos(unsigned short dev_type)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
- if (netdev_lock_type[i] == dev_type)
- return i;
- /* the last key is used by default */
- return ARRAY_SIZE(netdev_lock_type) - 1;
-}
-
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
- unsigned short dev_type)
-{
- int i;
-
- i = netdev_lock_pos(dev_type);
- lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
- netdev_lock_name[i]);
-}
-
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
- int i;
-
- i = netdev_lock_pos(dev->type);
- lockdep_set_class_and_name(&dev->addr_list_lock,
- &netdev_addr_lock_key[i],
- netdev_lock_name[i]);
-}
-#else
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
- unsigned short dev_type)
-{
-}
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
-}
-#endif
-
/*******************************************************************************
*
* Protocol management and registration routines
@@@ -768,10 -733,14 +768,10 @@@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst
struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
- struct net_device *dev;
- struct hlist_head *head = dev_name_hash(net, name);
-
- hlist_for_each_entry(dev, head, name_hlist)
- if (!strncmp(dev->name, name, IFNAMSIZ))
- return dev;
+ struct netdev_name_node *node_name;
- return NULL;
+ node_name = netdev_name_node_lookup(net, name);
+ return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(__dev_get_by_name);
@@@ -789,10 -758,14 +789,10 @@@
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
{
- struct net_device *dev;
- struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *node_name;
- hlist_for_each_entry_rcu(dev, head, name_hlist)
- if (!strncmp(dev->name, name, IFNAMSIZ))
- return dev;
-
- return NULL;
+ node_name = netdev_name_node_lookup_rcu(net, name);
+ return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(dev_get_by_name_rcu);
@@@ -1168,8 -1141,8 +1168,8 @@@ int dev_alloc_name(struct net_device *d
}
EXPORT_SYMBOL(dev_alloc_name);
-int dev_get_valid_name(struct net *net, struct net_device *dev,
- const char *name)
+static int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@@ -1185,6 -1158,7 +1185,6 @@@
return 0;
}
-EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@@ -1258,13 -1232,13 +1258,13 @@@ rollback
netdev_adjacent_rename_links(dev, oldname);
write_lock_bh(&dev_base_lock);
- hlist_del_rcu(&dev->name_hlist);
+ netdev_name_node_del(dev->name_node);
write_unlock_bh(&dev_base_lock);
synchronize_rcu();
write_lock_bh(&dev_base_lock);
- hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ netdev_name_node_add(net, dev->name_node);
write_unlock_bh(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@@ -1314,8 -1288,8 +1314,8 @@@ int dev_set_alias(struct net_device *de
}
mutex_lock(&ifalias_mutex);
- rcu_swap_protected(dev->ifalias, new_alias,
- mutex_is_locked(&ifalias_mutex));
+ new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
mutex_unlock(&ifalias_mutex);
if (new_alias)
@@@ -1643,62 -1617,6 +1643,62 @@@ static int call_netdevice_notifier(stru
return nb->notifier_call(nb, val, &info);
}
+static int call_netdevice_register_notifiers(struct notifier_block *nb,
+ struct net_device *dev)
+{
+ int err;
+
+ err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ return err;
+
+ if (!(dev->flags & IFF_UP))
+ return 0;
+
+ call_netdevice_notifier(nb, NETDEV_UP, dev);
+ return 0;
+}
+
+static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
+ struct net_device *dev)
+{
+ if (dev->flags & IFF_UP) {
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
+ }
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
+}
+
+static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
+ struct net *net)
+{
+ struct net_device *dev;
+ int err;
+
+ for_each_netdev(net, dev) {
+ err = call_netdevice_register_notifiers(nb, dev);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ for_each_netdev_continue_reverse(net, dev)
+ call_netdevice_unregister_notifiers(nb, dev);
+ return err;
+}
+
+static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
+ struct net *net)
+{
+ struct net_device *dev;
+
+ for_each_netdev(net, dev)
+ call_netdevice_unregister_notifiers(nb, dev);
+}
+
static int dev_boot_phase = 1;
/**
@@@ -1717,6 -1635,8 +1717,6 @@@
int register_netdevice_notifier(struct notifier_block *nb)
{
- struct net_device *dev;
- struct net_device *last;
struct net *net;
int err;
@@@ -1729,9 -1649,17 +1729,9 @@@
if (dev_boot_phase)
goto unlock;
for_each_net(net) {
- for_each_netdev(net, dev) {
- err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
- err = notifier_to_errno(err);
- if (err)
- goto rollback;
-
- if (!(dev->flags & IFF_UP))
- continue;
-
- call_netdevice_notifier(nb, NETDEV_UP, dev);
- }
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err)
+ goto rollback;
}
unlock:
@@@ -1740,9 -1668,22 +1740,9 @@@
return err;
rollback:
- last = dev;
- for_each_net(net) {
- for_each_netdev(net, dev) {
- if (dev == last)
- goto outroll;
-
- if (dev->flags & IFF_UP) {
- call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
- dev);
- call_netdevice_notifier(nb, NETDEV_DOWN, dev);
- }
- call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
- }
- }
+ for_each_net_continue_reverse(net)
+ call_netdevice_unregister_net_notifiers(nb, net);
-outroll:
raw_notifier_chain_unregister(&netdev_chain, nb);
goto unlock;
}
@@@ -1792,80 -1733,6 +1792,80 @@@ unlock
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
+/**
+ * register_netdevice_notifier_net - register a per-netns network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ *
+ * When registered all registration and up events are replayed
+ * to the new notifier to allow device to have a race free
+ * view of the network device list.
+ */
+
+int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_register(&net->netdev_chain, nb);
+ if (err)
+ goto unlock;
+ if (dev_boot_phase)
+ goto unlock;
+
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err)
+ goto chain_unregister;
+
+unlock:
+ rtnl_unlock();
+ return err;
+
+chain_unregister:
+ raw_notifier_chain_unregister(&netdev_chain, nb);
+ goto unlock;
+}
+EXPORT_SYMBOL(register_netdevice_notifier_net);
+
+/**
+ * unregister_netdevice_notifier_net - unregister a per-netns
+ * network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by
+ * register_netdevice_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
+ *
+ * After unregistering unregister and down device events are synthesized
+ * for all devices on the device list to the removed notifier to remove
+ * the need for special case cleanup code.
+ */
+
+int unregister_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ if (err)
+ goto unlock;
+
+ call_netdevice_unregister_net_notifiers(nb, net);
+
+unlock:
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+
/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
@@@ -1878,18 -1745,7 +1878,18 @@@
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info)
{
+ struct net *net = dev_net(info->dev);
+ int ret;
+
ASSERT_RTNL();
+
+ /* Run per-netns notifier block chain first, then run the global one.
+ * Hopefully, one day, the global one is going to be removed after
+ * all notifier block registrators get converted to be per-netns.
+ */
+ ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
+ if (ret & NOTIFY_STOP_MASK)
+ return ret;
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@@ -2915,7 -2771,7 +2915,7 @@@ static struct dev_kfree_skb_cb *get_kfr
void netif_schedule_queue(struct netdev_queue *txq)
{
rcu_read_lock();
- if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+ if (!netif_xmit_stopped(txq)) {
struct Qdisc *q = rcu_dereference(txq->qdisc);
__netif_schedule(q);
@@@ -3083,9 -2939,12 +3083,9 @@@ int skb_checksum_help(struct sk_buff *s
offset += skb->csum_offset;
BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(__sum16))) {
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (ret)
- goto out;
- }
+ ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
+ if (ret)
+ goto out;
*(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
@@@ -3120,11 -2979,12 +3120,11 @@@ int skb_crc32c_csum_help(struct sk_buf
ret = -EINVAL;
goto out;
}
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(__le32))) {
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (ret)
- goto out;
- }
+
+ ret = skb_ensure_writable(skb, offset + sizeof(__le32));
+ if (ret)
+ goto out;
+
crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
skb->len - start, ~(__u32)0,
crc32c_csum_stub));
@@@ -3607,7 -3467,7 +3607,7 @@@ static inline int __dev_xmit_skb(struc
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+ if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
qdisc_run_begin(q)) {
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
&q->state))) {
@@@ -5586,7 -5446,7 +5586,7 @@@ static struct list_head *gro_list_prepa
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
if (skb_vlan_tag_present(p))
- diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
@@@ -5611,7 -5471,8 +5611,7 @@@ static void skb_gro_reset_offset(struc
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
- if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
- pinfo->nr_frags &&
+ if (!skb_headlen(skb) && pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
@@@ -5802,26 -5663,6 +5802,26 @@@ struct packet_offload *gro_find_complet
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
@@@ -5829,13 -5670,12 +5829,13 @@@
kmem_cache_free(skbuff_head_cache, skb);
}
-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(struct napi_struct *napi,
+ struct sk_buff *skb,
+ gro_result_t ret)
{
switch (ret) {
case GRO_NORMAL:
- if (netif_receive_skb_internal(skb))
- ret = GRO_DROP;
+ gro_normal_one(napi, skb);
break;
case GRO_DROP:
@@@ -5867,7 -5707,7 +5867,7 @@@ gro_result_t napi_gro_receive(struct na
skb_gro_reset_offset(skb);
- ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
trace_napi_gro_receive_exit(ret);
return ret;
@@@ -5913,6 -5753,26 +5913,6 @@@ struct sk_buff *napi_get_frags(struct n
}
EXPORT_SYMBOL(napi_get_frags);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static gro_result_t napi_frags_finish(struct napi_struct *napi,
struct sk_buff *skb,
gro_result_t ret)
@@@ -6629,9 -6489,6 +6629,9 @@@ struct netdev_adjacent
/* upper master flag, there can only be one master device per list */
bool master;
+ /* lookup ignore flag */
+ bool ignore;
+
/* counter for the number of times this device was added to us */
u16 ref_nr;
@@@ -6654,7 -6511,7 +6654,7 @@@ static struct netdev_adjacent *__netdev
return NULL;
}
-static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
{
struct net_device *dev = data;
@@@ -6675,7 -6532,7 +6675,7 @@@ bool netdev_has_upper_dev(struct net_de
{
ASSERT_RTNL();
- return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
upper_dev);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@@ -6693,7 -6550,7 +6693,7 @@@
bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
struct net_device *upper_dev)
{
- return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
upper_dev);
}
EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
@@@ -6737,22 -6594,6 +6737,22 @@@ struct net_device *netdev_master_upper_
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
+{
+ struct netdev_adjacent *upper;
+
+ ASSERT_RTNL();
+
+ if (list_empty(&dev->adj_list.upper))
+ return NULL;
+
+ upper = list_first_entry(&dev->adj_list.upper,
+ struct netdev_adjacent, list);
+ if (likely(upper->master) && !upper->ignore)
+ return upper->dev;
+ return NULL;
+}
+
/**
* netdev_has_any_lower_dev - Check if device is linked to some device
* @dev: device
@@@ -6803,23 -6644,6 +6803,23 @@@ struct net_device *netdev_upper_get_nex
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
+ struct list_head **iter,
+ bool *ignore)
+{
+ struct netdev_adjacent *upper;
+
+ upper = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->adj_list.upper)
+ return NULL;
+
+ *iter = &upper->list;
+ *ignore = upper->ignore;
+
+ return upper->dev;
+}
+
static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
struct list_head **iter)
{
@@@ -6837,111 -6661,34 +6837,111 @@@
return upper->dev;
}
+static int __netdev_walk_all_upper_dev(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
+ bool ignore;
+
+ now = dev;
+ iter = &dev->adj_list.upper;
+
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ udev = __netdev_next_upper_dev(now, &iter, &ignore);
+ if (!udev)
+ break;
+ if (ignore)
+ continue;
+
+ next = udev;
+ niter = &udev->adj_list.upper;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return 0;
+}
+
int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
int (*fn)(struct net_device *dev,
void *data),
void *data)
{
- struct net_device *udev;
- struct list_head *iter;
- int ret;
+ struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
- for (iter = &dev->adj_list.upper,
- udev = netdev_next_upper_dev_rcu(dev, &iter);
- udev;
- udev = netdev_next_upper_dev_rcu(dev, &iter)) {
- /* first is the upper device itself */
- ret = fn(udev, data);
- if (ret)
- return ret;
+ now = dev;
+ iter = &dev->adj_list.upper;
- /* then look at all of its upper devices */
- ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
- if (ret)
- return ret;
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ udev = netdev_next_upper_dev_rcu(now, &iter);
+ if (!udev)
+ break;
+
+ next = udev;
+ niter = &udev->adj_list.upper;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
}
return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
+static bool __netdev_has_upper_dev(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ ASSERT_RTNL();
+
+ return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
+ upper_dev);
+}
+
/**
* netdev_lower_get_next_private - Get the next ->private from the
* lower neighbour list
@@@ -7038,119 -6785,34 +7038,119 @@@ static struct net_device *netdev_next_l
return lower->dev;
}
+static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
+ struct list_head **iter,
+ bool *ignore)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+ *ignore = lower->ignore;
+
+ return lower->dev;
+}
+
int netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *dev,
void *data),
void *data)
{
- struct net_device *ldev;
- struct list_head *iter;
- int ret;
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
- for (iter = &dev->adj_list.lower,
- ldev = netdev_next_lower_dev(dev, &iter);
- ldev;
- ldev = netdev_next_lower_dev(dev, &iter)) {
- /* first is the lower device itself */
- ret = fn(ldev, data);
- if (ret)
- return ret;
+ now = dev;
+ iter = &dev->adj_list.lower;
- /* then look at all of its lower devices */
- ret = netdev_walk_all_lower_dev(ldev, fn, data);
- if (ret)
- return ret;
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
}
return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
+static int __netdev_walk_all_lower_dev(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
+ bool ignore;
+
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ ldev = __netdev_next_lower_dev(now, &iter, &ignore);
+ if (!ldev)
+ break;
+ if (ignore)
+ continue;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return 0;
+}
+
static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
struct list_head **iter)
{
@@@ -7165,99 -6827,28 +7165,99 @@@
return lower->dev;
}
-int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
- int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+static u8 __netdev_upper_depth(struct net_device *dev)
+{
+ struct net_device *udev;
+ struct list_head *iter;
+ u8 max_depth = 0;
+ bool ignore;
+
+ for (iter = &dev->adj_list.upper,
+ udev = __netdev_next_upper_dev(dev, &iter, &ignore);
+ udev;
+ udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
+ if (ignore)
+ continue;
+ if (max_depth < udev->upper_level)
+ max_depth = udev->upper_level;
+ }
+
+ return max_depth;
+}
+
+static u8 __netdev_lower_depth(struct net_device *dev)
{
struct net_device *ldev;
struct list_head *iter;
- int ret;
+ u8 max_depth = 0;
+ bool ignore;
for (iter = &dev->adj_list.lower,
- ldev = netdev_next_lower_dev_rcu(dev, &iter);
+ ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
ldev;
- ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
- /* first is the lower device itself */
- ret = fn(ldev, data);
- if (ret)
- return ret;
+ ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
+ if (ignore)
+ continue;
+ if (max_depth < ldev->lower_level)
+ max_depth = ldev->lower_level;
+ }
- /* then look at all of its lower devices */
- ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
- if (ret)
- return ret;
+ return max_depth;
+}
+
+static int __netdev_update_upper_level(struct net_device *dev, void *data)
+{
+ dev->upper_level = __netdev_upper_depth(dev) + 1;
+ return 0;
+}
+
+static int __netdev_update_lower_level(struct net_device *dev, void *data)
+{
+ dev->lower_level = __netdev_lower_depth(dev) + 1;
+ return 0;
+}
+
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
+
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev_rcu(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
}
return 0;
@@@ -7361,7 -6952,6 +7361,7 @@@ static int __netdev_adjacent_dev_insert
adj->master = master;
adj->ref_nr = 1;
adj->private = private;
+ adj->ignore = false;
dev_hold(adj_dev);
pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on
%s\n",
@@@ -7512,17 -7102,14 +7512,17 @@@ static int __netdev_upper_dev_link(stru
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (netdev_has_upper_dev(upper_dev, dev))
+ if (__netdev_has_upper_dev(upper_dev, dev))
return -EBUSY;
+ if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
+ return -EMLINK;
+
if (!master) {
- if (netdev_has_upper_dev(dev, upper_dev))
+ if (__netdev_has_upper_dev(dev, upper_dev))
return -EEXIST;
} else {
- master_dev = netdev_master_upper_dev_get(dev);
+ master_dev = __netdev_master_upper_dev_get(dev);
if (master_dev)
return master_dev == upper_dev ? -EEXIST : -EBUSY;
}
@@@ -7544,13 -7131,6 +7544,13 @@@
if (ret)
goto rollback;
+ __netdev_update_upper_level(dev, NULL);
+ __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+ __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+ NULL);
+
return 0;
rollback:
@@@ -7633,96 -7213,9 +7633,96 @@@ void netdev_upper_dev_unlink(struct net
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
+
+ __netdev_update_upper_level(dev, NULL);
+ __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+ __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+ NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
+ struct net_device *lower_dev,
+ bool val)
+{
+ struct netdev_adjacent *adj;
+
+ adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
+ if (adj)
+ adj->ignore = val;
+
+ adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
+ if (adj)
+ adj->ignore = val;
+}
+
+static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
+ struct net_device *lower_dev)
+{
+ __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
+}
+
+static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
+ struct net_device *lower_dev)
+{
+ __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
+}
+
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!new_dev)
+ return 0;
+
+ if (old_dev && new_dev != old_dev)
+ netdev_adjacent_dev_disable(dev, old_dev);
+
+ err = netdev_upper_dev_link(new_dev, dev, extack);
+ if (err) {
+ if (old_dev && new_dev != old_dev)
+ netdev_adjacent_dev_enable(dev, old_dev);
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_adjacent_change_prepare);
+
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev)
+{
+ if (!new_dev || !old_dev)
+ return;
+
+ if (new_dev == old_dev)
+ return;
+
+ netdev_adjacent_dev_enable(dev, old_dev);
+ netdev_upper_dev_unlink(old_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_commit);
+
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev)
+{
+ if (!new_dev)
+ return;
+
+ if (old_dev && new_dev != old_dev)
+ netdev_adjacent_dev_enable(dev, old_dev);
+
+ netdev_upper_dev_unlink(new_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_abort);
+
/**
* netdev_bonding_info_change - Dispatch event about slave change
* @dev: device
@@@ -7836,6 -7329,25 +7836,6 @@@ void *netdev_lower_dev_get_private(stru
EXPORT_SYMBOL(netdev_lower_dev_get_private);
-int dev_get_nest_level(struct net_device *dev)
-{
- struct net_device *lower = NULL;
- struct list_head *iter;
- int max_nest = -1;
- int nest;
-
- ASSERT_RTNL();
-
- netdev_for_each_lower_dev(dev, lower, iter) {
- nest = dev_get_nest_level(lower);
- if (max_nest < nest)
- max_nest = nest;
- }
-
- return max_nest + 1;
-}
-EXPORT_SYMBOL(dev_get_nest_level);
-
/**
* netdev_lower_change - Dispatch event about lower device state change
* @lower_dev: device
@@@ -8642,8 -8154,7 +8642,8 @@@ int dev_change_xdp_fd(struct net_devic
return -EINVAL;
}
- if (prog->aux->id == prog_id) {
+ /* prog->aux->id may be 0 for orphaned device-bound progs */
+ if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
return 0;
}
@@@ -8753,9 -8264,6 +8753,9 @@@ static void rollback_registered_many(st
dev_uc_flush(dev);
dev_mc_flush(dev);
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@@ -9111,7 -8619,7 +9111,7 @@@ static void netdev_init_one_queue(struc
{
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
- netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+ lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
queue->xmit_lock_owner = -1;
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
@@@ -9158,43 -8666,6 +9158,43 @@@ void netif_tx_stop_all_queues(struct ne
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
+static void netdev_register_lockdep_key(struct net_device *dev)
+{
+ lockdep_register_key(&dev->qdisc_tx_busylock_key);
+ lockdep_register_key(&dev->qdisc_running_key);
+ lockdep_register_key(&dev->qdisc_xmit_lock_key);
+ lockdep_register_key(&dev->addr_list_lock_key);
+}
+
+static void netdev_unregister_lockdep_key(struct net_device *dev)
+{
+ lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
+ lockdep_unregister_key(&dev->qdisc_running_key);
+ lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+ lockdep_unregister_key(&dev->addr_list_lock_key);
+}
+
+void netdev_update_lockdep_key(struct net_device *dev)
+{
+ struct netdev_queue *queue;
+ int i;
+
+ lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+ lockdep_unregister_key(&dev->addr_list_lock_key);
+
+ lockdep_register_key(&dev->qdisc_xmit_lock_key);
+ lockdep_register_key(&dev->addr_list_lock_key);
+
+ lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ queue = netdev_get_tx_queue(dev, i);
+
+ lockdep_set_class(&queue->_xmit_lock,
+ &dev->qdisc_xmit_lock_key);
+ }
+}
+EXPORT_SYMBOL(netdev_update_lockdep_key);
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@@ -9229,17 -8700,12 +9229,17 @@@ int register_netdevice(struct net_devic
BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock);
- netdev_set_addr_lockdep_class(dev);
+ lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
+ ret = -ENOMEM;
+ dev->name_node = netdev_name_node_head_alloc(dev);
+ if (!dev->name_node)
+ goto out;
+
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@@ -9361,8 -8827,6 +9361,8 @@@ out
return ret;
err_uninit:
+ if (dev->name_node)
+ netdev_name_node_free(dev->name_node);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
if (dev->priv_destructor)
@@@ -9746,12 -9210,8 +9746,12 @@@ struct net_device *alloc_netdev_mqs(in
dev_net_set(dev, &init_net);
+ netdev_register_lockdep_key(dev);
+
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
+ dev->upper_level = 1;
+ dev->lower_level = 1;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
@@@ -9832,8 -9292,6 +9832,8 @@@ void free_netdev(struct net_device *dev
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ netdev_unregister_lockdep_key(dev);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
@@@ -10002,7 -9460,7 +10002,7 @@@ int dev_change_net_namespace(struct net
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
- new_nsid = peernet2id_alloc(dev_net(dev), net);
+ new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
/* If there is an ifindex conflict assign a new one */
if (__dev_get_by_index(net, dev->ifindex))
new_ifindex = dev_new_index(net);
@@@ -10164,7 -9622,7 +10164,7 @@@ static struct hlist_head * __net_init n
static int __net_init netdev_init(struct net *net)
{
BUILD_BUG_ON(GRO_HASH_BUCKETS >
- 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+ 8 * sizeof_member(struct napi_struct, gro_bitmask));
if (net != &init_net)
INIT_LIST_HEAD(&net->dev_base_head);
@@@ -10177,8 -9635,6 +10177,8 @@@
if (net->dev_index_head == NULL)
goto err_idx;
+ RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
+
return 0;
err_idx:
diff --combined net/core/filter.c
index b0ed048585ba,284f60695ba9..222c6f63006e
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@@ -274,7 -274,7 +274,7 @@@ static u32 convert_skb_access(int skb_f
switch (skb_field) {
case SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, mark) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
offsetof(struct sk_buff, mark));
@@@ -289,14 -289,14 +289,14 @@@
break;
case SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, queue_mapping) != 2);
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, queue_mapping));
break;
case SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, vlan_tci) != 2);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
@@@ -322,7 -322,7 +322,7 @@@ static bool convert_bpf_extensions(stru
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, protocol) != 2);
/* A = *(u16 *) (CTX + offsetof(protocol)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
@@@ -338,8 -338,8 +338,8 @@@
case SKF_AD_OFF + SKF_AD_IFINDEX:
case SKF_AD_OFF + SKF_AD_HATYPE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+ BUILD_BUG_ON(sizeof_member(struct net_device, ifindex) != 4);
+ BUILD_BUG_ON(sizeof_member(struct net_device, type) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
BPF_REG_TMP, BPF_REG_CTX,
@@@ -361,7 -361,7 +361,7 @@@
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, hash) != 4);
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
offsetof(struct sk_buff, hash));
@@@ -385,7 -385,7 +385,7 @@@
break;
case SKF_AD_OFF + SKF_AD_VLAN_TPID:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, vlan_proto) != 2);
/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
@@@ -2245,7 -2245,7 +2245,7 @@@ BPF_CALL_4(bpf_msg_pull_data, struct sk
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!msg->sg.copy[i] && bytes_sg_total <= len)
+ if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@@ -2450,7 -2450,7 +2450,7 @@@ BPF_CALL_4(bpf_msg_push_data, struct sk
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- msg->sg.copy[new] = false;
+ __clear_bit(new, &msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@@ -3798,7 -3798,7 +3798,7 @@@ BPF_CALL_5(bpf_skb_event_output, struc
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(skb_size > skb->len))
+ if (unlikely(!skb || skb_size > skb->len))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
@@@ -3816,19 -3816,6 +3816,19 @@@ static const struct bpf_func_proto bpf_
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static int bpf_skb_output_btf_ids[5];
+const struct bpf_func_proto bpf_skb_output_proto = {
+ .func = bpf_skb_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_skb_output_btf_ids,
+};
+
static unsigned short bpf_tunnel_key_af(u64 flags)
{
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
@@@ -4102,7 -4089,7 +4102,7 @@@ BPF_CALL_1(bpf_skb_cgroup_id, const str
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- return cgrp->kn->id.id;
+ return cgroup_id(cgrp);
}
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@@ -4127,7 -4114,7 +4127,7 @@@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id,
if (!ancestor)
return 0;
- return ancestor->kn->id.id;
+ return cgroup_id(ancestor);
}
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
@@@ -4265,14 -4252,12 +4265,14 @@@ BPF_CALL_5(bpf_setsockopt, struct bpf_s
case SO_RCVBUF:
val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
val = min_t(u32, val, sysctl_wmem_max);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+ WRITE_ONCE(sk->sk_sndbuf,
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
break;
case SO_MAX_PACING_RATE: /* 32bit version */
if (val != ~0U)
@@@ -4289,7 -4274,7 +4289,7 @@@
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
case SO_MARK:
if (sk->sk_mark != val) {
@@@ -5589,8 -5574,8 +5589,8 @@@ u32 bpf_tcp_sock_convert_ctx_access(enu
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
- FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ BUILD_BUG_ON(sizeof_member(struct tcp_sock, FIELD) > \
+ sizeof_member(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct tcp_sock, FIELD)); \
@@@ -5598,9 -5583,9 +5598,9 @@@
#define BPF_INET_SOCK_GET_COMMON(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \
+ BUILD_BUG_ON(sizeof_member(struct inet_connection_sock, \
FIELD) > \
- FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ sizeof_member(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct inet_connection_sock, \
FIELD), \
@@@ -5615,7 -5600,7 +5615,7 @@@
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ BUILD_BUG_ON(sizeof_member(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
@@@ -5780,8 -5765,8 +5780,8 @@@ u32 bpf_xdp_sock_convert_ctx_access(enu
#define BPF_XDP_SOCK_GET(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
- FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
+ BUILD_BUG_ON(sizeof_member(struct xdp_sock, FIELD) > \
+ sizeof_member(struct bpf_xdp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct xdp_sock, FIELD)); \
@@@ -7344,7 -7329,7 +7344,7 @@@ static u32 bpf_convert_ctx_access(enum
case offsetof(struct __sk_buff, cb[0]) ...
offsetofend(struct __sk_buff, cb[4]) - 1:
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ BUILD_BUG_ON(sizeof_member(struct qdisc_skb_cb, data) < 20);
BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
offsetof(struct qdisc_skb_cb, data)) %
sizeof(__u64));
@@@ -7363,7 -7348,7 +7363,7 @@@
break;
case offsetof(struct __sk_buff, tc_classid):
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
+ BUILD_BUG_ON(sizeof_member(struct qdisc_skb_cb, tc_classid) != 2);
off = si->off;
off -= offsetof(struct __sk_buff, tc_classid);
@@@ -7434,7 -7419,7 +7434,7 @@@
#endif
break;
case offsetof(struct __sk_buff, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@@ -7445,7 -7430,7 +7445,7 @@@
2, target_size));
break;
case offsetof(struct __sk_buff, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@@ -7456,7 -7441,7 +7456,7 @@@
4, target_size));
break;
case offsetof(struct __sk_buff, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
@@@ -7470,7 -7455,7 +7470,7 @@@
case offsetof(struct __sk_buff, remote_ip6[0]) ...
offsetof(struct __sk_buff, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@@ -7490,7 -7475,7 +7490,7 @@@
case offsetof(struct __sk_buff, local_ip6[0]) ...
offsetof(struct __sk_buff, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@@ -7509,7 -7494,7 +7509,7 @@@
break;
case offsetof(struct __sk_buff, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@@ -7524,7 -7509,7 +7524,7 @@@
break;
case offsetof(struct __sk_buff, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@@ -7535,7 -7520,7 +7535,7 @@@
break;
case offsetof(struct __sk_buff, tstamp):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+ BUILD_BUG_ON(sizeof_member(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_DW,
@@@ -7573,7 -7558,7 +7573,7 @@@
target_size));
break;
case offsetof(struct __sk_buff, wire_len):
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+ BUILD_BUG_ON(sizeof_member(struct qdisc_skb_cb, pkt_len) != 4);
off = si->off;
off -= offsetof(struct __sk_buff, wire_len);
@@@ -7603,7 -7588,7 +7603,7 @@@ u32 bpf_sock_convert_ctx_access(enum bp
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@@ -7614,7 -7599,7 +7614,7 @@@
break;
case offsetof(struct bpf_sock, mark):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@@ -7625,7 -7610,7 +7625,7 @@@
break;
case offsetof(struct bpf_sock, priority):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@@ -7641,7 -7626,7 +7641,7 @@@
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_family,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_family),
target_size));
break;
@@@ -7668,7 -7653,7 +7668,7 @@@
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_rcv_saddr,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_rcv_saddr),
target_size));
break;
@@@ -7677,7 -7662,7 +7677,7 @@@
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_daddr,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_daddr),
target_size));
break;
@@@ -7691,7 -7676,7 +7691,7 @@@
bpf_target_off(
struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0],
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]),
target_size) + off);
#else
@@@ -7708,7 -7693,7 +7708,7 @@@
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_v6_daddr.s6_addr32[0],
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_v6_daddr.s6_addr32[0]),
target_size) + off);
#else
@@@ -7722,7 -7707,7 +7722,7 @@@
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_num,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_num),
target_size));
break;
@@@ -7732,7 -7717,7 +7732,7 @@@
BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_dport,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_dport),
target_size));
break;
@@@ -7742,7 -7727,7 +7742,7 @@@
BPF_FIELD_SIZEOF(struct sock_common, skc_state),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_state,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_member(struct sock_common,
skc_state),
target_size));
break;
@@@ -7837,7 -7822,7 +7837,7 @@@ static u32 xdp_convert_ctx_access(enum
si->src_reg, offsetof(S, F)); \
*insn++ = BPF_LDX_MEM( \
SIZE, si->dst_reg, si->dst_reg, \
- bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ bpf_target_off(NS, NF, sizeof_member(NS, NF), \
target_size) \
+ OFF); \
} while (0)
@@@ -7868,7 -7853,7 +7868,7 @@@
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
- bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ bpf_target_off(NS, NF, sizeof_member(NS, NF), \
target_size) \
+ OFF); \
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
@@@ -7930,8 -7915,8 +7930,8 @@@ static u32 sock_addr_convert_ctx_access
*/
BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
offsetof(struct sockaddr_in6, sin6_port));
- BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
- FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(sizeof_member(struct sockaddr_in, sin_port) !=
+ sizeof_member(struct sockaddr_in6, sin6_port));
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
struct sockaddr_in6, uaddr,
sin6_port, tmp_reg);
@@@ -7997,8 -7982,8 +7997,8 @@@ static u32 sock_ops_convert_ctx_access(
/* Helper macro for adding read access to tcp_sock or sock fields. */
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ BUILD_BUG_ON(sizeof_member(OBJ, OBJ_FIELD) > \
+ sizeof_member(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
@@@ -8031,8 -8016,8 +8031,8 @@@
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
int reg = BPF_REG_9; \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ BUILD_BUG_ON(sizeof_member(OBJ, OBJ_FIELD) > \
+ sizeof_member(struct bpf_sock_ops, BPF_FIELD)); \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
if (si->dst_reg == reg || si->src_reg == reg) \
@@@ -8073,12 -8058,12 +8073,12 @@@
switch (si->off) {
case offsetof(struct bpf_sock_ops, op) ...
offsetof(struct bpf_sock_ops, replylong[3]):
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+ BUILD_BUG_ON(sizeof_member(struct bpf_sock_ops, op) !=
+ sizeof_member(struct bpf_sock_ops_kern, op));
+ BUILD_BUG_ON(sizeof_member(struct bpf_sock_ops, reply) !=
+ sizeof_member(struct bpf_sock_ops_kern, reply));
+ BUILD_BUG_ON(sizeof_member(struct bpf_sock_ops, replylong) !=
+ sizeof_member(struct bpf_sock_ops_kern, replylong));
off = si->off;
off -= offsetof(struct bpf_sock_ops, op);
off += offsetof(struct bpf_sock_ops_kern, op);
@@@ -8091,7 -8076,7 +8091,7 @@@
break;
case offsetof(struct bpf_sock_ops, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@@ -8102,7 -8087,7 +8102,7 @@@
break;
case offsetof(struct bpf_sock_ops, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@@ -8113,7 -8098,7 +8113,7 @@@
break;
case offsetof(struct bpf_sock_ops, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
@@@ -8128,7 -8113,7 +8128,7 @@@
case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
offsetof(struct bpf_sock_ops, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@@ -8149,7 -8134,7 +8149,7 @@@
case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
offsetof(struct bpf_sock_ops, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@@ -8168,7 -8153,7 +8168,7 @@@
break;
case offsetof(struct bpf_sock_ops, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@@ -8182,7 -8167,7 +8182,7 @@@
break;
case offsetof(struct bpf_sock_ops, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@@ -8202,7 -8187,7 +8202,7 @@@
break;
case offsetof(struct bpf_sock_ops, state):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_state) != 1);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@@ -8213,7 -8198,7 +8213,7 @@@
break;
case offsetof(struct bpf_sock_ops, rtt_min):
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ BUILD_BUG_ON(sizeof_member(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
@@@ -8224,7 -8209,7 +8224,7 @@@
offsetof(struct bpf_sock_ops_kern, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct tcp_sock, rtt_min) +
- FIELD_SIZEOF(struct minmax_sample, t));
+ sizeof_member(struct minmax_sample, t));
break;
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
@@@ -8366,7 -8351,7 +8366,7 @@@ static u32 sk_msg_convert_ctx_access(en
offsetof(struct sk_msg, data_end));
break;
case offsetof(struct sk_msg_md, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@@ -8377,7 -8362,7 +8377,7 @@@
break;
case offsetof(struct sk_msg_md, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@@ -8388,7 -8373,7 +8388,7 @@@
break;
case offsetof(struct sk_msg_md, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
@@@ -8403,7 -8388,7 +8403,7 @@@
case offsetof(struct sk_msg_md, remote_ip6[0]) ...
offsetof(struct sk_msg_md, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@@ -8424,7 -8409,7 +8424,7 @@@
case offsetof(struct sk_msg_md, local_ip6[0]) ...
offsetof(struct sk_msg_md, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_member(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@@ -8443,7 -8428,7 +8443,7 @@@
break;
case offsetof(struct sk_msg_md, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@@ -8457,7 -8442,7 +8457,7 @@@
break;
case offsetof(struct sk_msg_md, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_member(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@@ -8684,6 -8669,16 +8684,6 @@@ out
}
#ifdef CONFIG_INET
-struct sk_reuseport_kern {
- struct sk_buff *skb;
- struct sock *sk;
- struct sock *selected_sk;
- void *data_end;
- u32 hash;
- u32 reuseport_id;
- bool bind_inany;
-};
-
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
struct sock_reuseport *reuse,
struct sock *sk, struct sk_buff *skb,
@@@ -8847,7 -8842,7 +8847,7 @@@ sk_reuseport_is_valid_access(int off, i
/* Fields that allow narrowing */
case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
- if (size < FIELD_SIZEOF(struct sk_buff, protocol))
+ if (size < sizeof_member(struct sk_buff, protocol))
return false;
/* fall through */
case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
@@@ -8865,7 -8860,7 +8865,7 @@@
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
si->dst_reg, si->src_reg, \
bpf_target_off(struct sk_reuseport_kern, F, \
- FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+ sizeof_member(struct sk_reuseport_kern, F), \
target_size)); \
})
diff --combined net/core/flow_dissector.c
index ca871657a4c4,3f5f61bc2bfa..ea5ef1b80771
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@@ -114,50 -114,19 +114,50 @@@ int skb_flow_dissector_bpf_prog_attach(
{
struct bpf_prog *attached;
struct net *net;
+ int ret = 0;
net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);
+
+ if (net == &init_net) {
+ /* BPF flow dissector in the root namespace overrides
+ * any per-net-namespace one. When attaching to root,
+ * make sure we don't have any BPF program attached
+ * to the non-root namespaces.
+ */
+ struct net *ns;
+
+ for_each_net(ns) {
+ if (ns == &init_net)
+ continue;
+ if (rcu_access_pointer(ns->flow_dissector_prog)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ } else {
+ /* Make sure root flow dissector is not attached
+ * when attaching to the non-root namespace.
+ */
+ if (rcu_access_pointer(init_net.flow_dissector_prog)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
- if (attached) {
- /* Only one BPF program can be attached at a time */
- mutex_unlock(&flow_dissector_mutex);
- return -EEXIST;
+ if (attached == prog) {
+ /* The same program cannot be attached twice */
+ ret = -EINVAL;
+ goto out;
}
rcu_assign_pointer(net->flow_dissector_prog, prog);
+ if (attached)
+ bpf_prog_put(attached);
+out:
mutex_unlock(&flow_dissector_mutex);
- return 0;
+ return ret;
}
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
@@@ -178,6 -147,27 +178,6 @@@
mutex_unlock(&flow_dissector_mutex);
return 0;
}
-/**
- * skb_flow_get_be16 - extract be16 entity
- * @skb: sk_buff to extract from
- * @poff: offset to extract at
- * @data: raw buffer pointer to the packet
- * @hlen: packet header length
- *
- * The function will try to retrieve a be32 entity at
- * offset poff
- */
-static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
- void *data, int hlen)
-{
- __be16 *u, _u;
-
- u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
- if (u)
- return *u;
-
- return 0;
-}
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@@ -213,72 -203,6 +213,72 @@@ __be32 __skb_flow_get_ports(const struc
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static bool icmp_has_id(u8 type)
+{
+ switch (type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ case ICMP_TIMESTAMP:
+ case ICMP_TIMESTAMPREPLY:
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
+ * @skb: sk_buff to extract from
+ * @key_icmp: struct flow_dissector_key_icmp to fill
+ * @data: raw buffer pointer to the packet
+ * @toff: offset to extract at
+ * @hlen: packet header length
+ */
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+ struct flow_dissector_key_icmp *key_icmp,
+ void *data, int thoff, int hlen)
+{
+ struct icmphdr *ih, _ih;
+
+ ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
+ if (!ih)
+ return;
+
+ key_icmp->type = ih->type;
+ key_icmp->code = ih->code;
+
+ /* As we use 0 to signal that the Id field is not present,
+ * avoid confusion with packets without such field
+ */
+ if (icmp_has_id(ih->type))
+ key_icmp->id = ih->un.echo.id ? : 1;
+ else
+ key_icmp->id = 0;
+}
+EXPORT_SYMBOL(skb_flow_get_icmp_tci);
+
+/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
+ * using skb_flow_get_icmp_tci().
+ */
+static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
+ void *data, int thoff, int hlen)
+{
+ struct flow_dissector_key_icmp *key_icmp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
+ return;
+
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+
+ skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
+}
+
void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container)
@@@ -599,8 -523,8 +599,8 @@@ __skb_flow_dissect_gre(const struct sk_
offset += sizeof(struct gre_base_hdr);
if (hdr->flags & GRE_CSUM)
- offset += FIELD_SIZEOF(struct gre_full_hdr, csum) +
- FIELD_SIZEOF(struct gre_full_hdr, reserved1);
+ offset += sizeof_member(struct gre_full_hdr, csum) +
+ sizeof_member(struct gre_full_hdr, reserved1);
if (hdr->flags & GRE_KEY) {
const __be32 *keyid;
@@@ -622,11 -546,11 +622,11 @@@
else
key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
}
- offset += FIELD_SIZEOF(struct gre_full_hdr, key);
+ offset += sizeof_member(struct gre_full_hdr, key);
}
if (hdr->flags & GRE_SEQ)
- offset += FIELD_SIZEOF(struct pptp_gre_header, seq);
+ offset += sizeof_member(struct pptp_gre_header, seq);
if (gre_ver == 0) {
if (*p_proto == htons(ETH_P_TEB)) {
@@@ -653,7 -577,7 +653,7 @@@
u8 *ppp_hdr;
if (hdr->flags & GRE_ACK)
- offset += FIELD_SIZEOF(struct pptp_gre_header, ack);
+ offset += sizeof_member(struct pptp_gre_header, ack);
ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
sizeof(_ppp_hdr),
@@@ -929,6 -853,7 +929,6 @@@ bool __skb_flow_dissect(const struct ne
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
- struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct bpf_prog *attached = NULL;
@@@ -985,10 -910,7 +985,10 @@@
WARN_ON_ONCE(!net);
if (net) {
rcu_read_lock();
- attached = rcu_dereference(net->flow_dissector_prog);
+ attached = rcu_dereference(init_net.flow_dissector_prog);
+
+ if (!attached)
+ attached = rcu_dereference(net->flow_dissector_prog);
if (attached) {
struct bpf_flow_keys flow_keys;
@@@ -1373,12 -1295,6 +1373,12 @@@ ip_proto_again
data, nhoff, hlen);
break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ __skb_flow_dissect_icmp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+
default:
break;
}
@@@ -1392,6 -1308,14 +1392,6 @@@
data, hlen);
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP)) {
- key_icmp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP,
- target_container);
- key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
- }
-
/* Process result of IP proto processing */
switch (fdret) {
case FLOW_DISSECT_RET_PROTO_AGAIN:
@@@ -1426,23 -1350,32 +1426,23 @@@ out_bad
}
EXPORT_SYMBOL(__skb_flow_dissect);
-static u32 hashrnd __read_mostly;
+static siphash_key_t hashrnd __read_mostly;
static __always_inline void __flow_hash_secret_init(void)
{
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
- u32 keyval)
-{
- return jhash2(words, length, keyval);
-}
-
-static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
+static const void *flow_keys_hash_start(const struct flow_keys *flow)
{
- const void *p = flow;
-
- BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
- return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
+ BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT);
+ return &flow->FLOW_KEYS_HASH_START_FIELD;
}
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
+
BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
- BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
- sizeof(*flow) - sizeof(flow->addrs));
switch (flow->control.addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
@@@ -1455,7 -1388,7 +1455,7 @@@
diff -= sizeof(flow->addrs.tipckey);
break;
}
- return (sizeof(*flow) - diff) / sizeof(u32);
+ return sizeof(*flow) - diff;
}
__be32 flow_get_u32_src(const struct flow_keys *flow)
@@@ -1488,9 -1421,6 +1488,9 @@@ __be32 flow_get_u32_dst(const struct fl
}
EXPORT_SYMBOL(flow_get_u32_dst);
+/* Sort the source and destination IP (and the ports if the IP are the same),
+ * to have consistent hash within the two directions
+ */
static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
int addr_diff, i;
@@@ -1524,15 -1454,14 +1524,15 @@@
}
}
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys,
+ const siphash_key_t *keyval)
{
u32 hash;
__flow_hash_consistentify(keys);
- hash = __flow_hash_words(flow_keys_hash_start(keys),
- flow_keys_hash_length(keys), keyval);
+ hash = siphash(flow_keys_hash_start(keys),
+ flow_keys_hash_length(keys), keyval);
if (!hash)
hash = 1;
@@@ -1542,13 -1471,12 +1542,13 @@@
u32 flow_hash_from_keys(struct flow_keys *keys)
{
__flow_hash_secret_init();
- return __flow_hash_from_keys(keys, hashrnd);
+ return __flow_hash_from_keys(keys, &hashrnd);
}
EXPORT_SYMBOL(flow_hash_from_keys);
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
- struct flow_keys *keys, u32 keyval)
+ struct flow_keys *keys,
+ const siphash_key_t *keyval)
{
skb_flow_dissect_flow_keys(skb, keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
@@@ -1596,7 -1524,7 +1596,7 @@@ u32 __skb_get_hash_symmetric(const stru
&keys, NULL, 0, 0, 0,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
- return __flow_hash_from_keys(&keys, hashrnd);
+ return __flow_hash_from_keys(&keys, &hashrnd);
}
EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
@@@ -1616,14 -1544,13 +1616,14 @@@ void __skb_get_hash(struct sk_buff *skb
__flow_hash_secret_init();
- hash = ___skb_get_hash(skb, &keys, hashrnd);
+ hash = ___skb_get_hash(skb, &keys, &hashrnd);
__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
}
EXPORT_SYMBOL(__skb_get_hash);
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+ const siphash_key_t *perturb)
{
struct flow_keys keys;
diff --combined net/core/skbuff.c
index 867e61df00db,6054465875d8..31b8539c72c1
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -4148,7 -4148,7 +4148,7 @@@ void __init skb_init(void
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
offsetof(struct sk_buff, cb),
- sizeof_field(struct sk_buff, cb),
+ sizeof_member(struct sk_buff, cb),
NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
sizeof(struct sk_buff_fclones),
@@@ -4415,7 -4415,7 +4415,7 @@@ static void skb_set_err_queue(struct sk
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf)
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
return -ENOMEM;
skb_orphan(skb);
@@@ -5477,14 -5477,12 +5477,14 @@@ static void skb_mod_eth_type(struct sk_
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
+ * @mac_len: length of the MAC header
*
* Expects skb->data at mac header.
*
* Returns 0 on success, -errno otherwise.
*/
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+ int mac_len)
{
struct mpls_shim_hdr *lse;
int err;
@@@ -5501,15 -5499,15 +5501,15 @@@
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, skb->mac_len);
+ skb_set_inner_network_header(skb, mac_len);
skb_set_inner_protocol(skb, skb->protocol);
}
skb_push(skb, MPLS_HLEN);
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
+ mac_len);
skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
+ skb_set_network_header(skb, mac_len);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
@@@ -5528,30 -5526,29 +5528,30 @@@ EXPORT_SYMBOL_GPL(skb_mpls_push)
*
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
+ * @mac_len: length of the MAC header
*
* Expects skb->data at mac header.
*
* Returns 0 on success, -errno otherwise.
*/
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto)
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len)
{
int err;
if (unlikely(!eth_p_mpls(skb->protocol)))
- return -EINVAL;
+ return 0;
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+ err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
+ mac_len);
__skb_pull(skb, MPLS_HLEN);
skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
+ skb_set_network_header(skb, mac_len);
if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
struct ethhdr *hdr;
diff --combined net/core/xdp.c
index e334fad0a6b8,621d6148b07a..dca31c721c24
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@@ -36,7 -36,7 +36,7 @@@ static u32 xdp_mem_id_hashfn(const voi
const u32 *k = data;
const u32 key = *k;
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
+ BUILD_BUG_ON(sizeof_member(struct xdp_mem_allocator, mem.id)
!= sizeof(u32));
/* Use cyclic increasing ID as direct hash key */
@@@ -56,7 -56,7 +56,7 @@@ static const struct rhashtable_params m
.nelem_hint = 64,
.head_offset = offsetof(struct xdp_mem_allocator, node),
.key_offset = offsetof(struct xdp_mem_allocator, mem.id),
- .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
+ .key_len = sizeof_member(struct xdp_mem_allocator, mem.id),
.max_size = MEM_ID_MAX,
.min_size = 8,
.automatic_shrinking = true,
@@@ -70,63 -70,77 +70,63 @@@ static void __xdp_mem_allocator_rcu_fre
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
- /* Allocator have indicated safe to remove before this is called */
- if (xa->mem.type == MEM_TYPE_PAGE_POOL)
- page_pool_free(xa->page_pool);
-
/* Allow this ID to be reused */
ida_simple_remove(&mem_id_pool, xa->mem.id);
- /* Poison memory */
- xa->mem.id = 0xFFFF;
- xa->mem.type = 0xF0F0;
- xa->allocator = (void *)0xDEAD9001;
-
kfree(xa);
}
-static bool __mem_id_disconnect(int id, bool force)
+static void mem_xa_remove(struct xdp_mem_allocator *xa)
{
- struct xdp_mem_allocator *xa;
- bool safe_to_remove = true;
+ trace_mem_disconnect(xa);
mutex_lock(&mem_id_lock);
- xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
- if (!xa) {
- mutex_unlock(&mem_id_lock);
- WARN(1, "Request remove non-existing id(%d), driver bug?", id);
- return true;
- }
- xa->disconnect_cnt++;
-
- /* Detects in-flight packet-pages for page_pool */
- if (xa->mem.type == MEM_TYPE_PAGE_POOL)
- safe_to_remove = page_pool_request_shutdown(xa->page_pool);
-
- trace_mem_disconnect(xa, safe_to_remove, force);
-
- if ((safe_to_remove || force) &&
- !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+ if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
mutex_unlock(&mem_id_lock);
- return (safe_to_remove|force);
}
-#define DEFER_TIME (msecs_to_jiffies(1000))
-#define DEFER_WARN_INTERVAL (30 * HZ)
-#define DEFER_MAX_RETRIES 120
+static void mem_allocator_disconnect(void *allocator)
+{
+ struct xdp_mem_allocator *xa;
+ struct rhashtable_iter iter;
+
+ rhashtable_walk_enter(mem_id_ht, &iter);
+ do {
+ rhashtable_walk_start(&iter);
+
+ while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) {
+ if (xa->allocator == allocator)
+ mem_xa_remove(xa);
+ }
+
+ rhashtable_walk_stop(&iter);
-static void mem_id_disconnect_defer_retry(struct work_struct *wq)
+ } while (xa == ERR_PTR(-EAGAIN));
+ rhashtable_walk_exit(&iter);
+}
+
+static void mem_id_disconnect(int id)
{
- struct delayed_work *dwq = to_delayed_work(wq);
- struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
- bool force = false;
+ struct xdp_mem_allocator *xa;
- if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
- force = true;
+ mutex_lock(&mem_id_lock);
- if (__mem_id_disconnect(xa->mem.id, force))
+ xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
+ if (!xa) {
+ mutex_unlock(&mem_id_lock);
+ WARN(1, "Request remove non-existing id(%d), driver bug?", id);
return;
+ }
- /* Periodic warning */
- if (time_after_eq(jiffies, xa->defer_warn)) {
- int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
+ trace_mem_disconnect(xa);
- pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
- __func__, xa->mem.id, xa->disconnect_cnt, sec);
- xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
- }
+ if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+ call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
- /* Still not ready to be disconnected, retry later */
- schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
+ mutex_unlock(&mem_id_lock);
}
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
@@@ -139,21 -153,38 +139,21 @@@
return;
}
- if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
- xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
- return;
- }
-
if (id == 0)
return;
- if (__mem_id_disconnect(id, false))
- return;
-
- /* Could not disconnect, defer new disconnect attempt to later */
- mutex_lock(&mem_id_lock);
+ if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY)
+ return mem_id_disconnect(id);
- xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
- if (!xa) {
- mutex_unlock(&mem_id_lock);
- return;
+ if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
+ rcu_read_lock();
+ xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+ page_pool_destroy(xa->page_pool);
+ rcu_read_unlock();
}
- xa->defer_start = jiffies;
- xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
-
- INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
- mutex_unlock(&mem_id_lock);
- schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
-/* This unregister operation will also cleanup and destroy the
- * allocator. The page_pool_free() operation is first called when it's
- * safe to remove, possibly deferred to a workqueue.
- */
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
{
/* Simplify driver cleanup code paths, allow unreg "unused" */
@@@ -340,7 -371,7 +340,7 @@@ int xdp_rxq_info_reg_mem_model(struct x
}
if (type == MEM_TYPE_PAGE_POOL)
- page_pool_get(xdp_alloc->page_pool);
+ page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
mutex_unlock(&mem_id_lock);
@@@ -355,7 -386,7 +355,7 @@@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
- * while still leveraging this protection. The @napi_direct boolian
+ * while still leveraging this protection. The @napi_direct boolean
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
@@@ -371,8 -402,15 +371,8 @@@ static void __xdp_return(void *data, st
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
- if (likely(xa)) {
- napi_direct &= !xdp_return_frame_no_direct();
- page_pool_put_page(xa->page_pool, page, napi_direct);
- } else {
- /* Hopefully stack show who to blame for late return */
- WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
- trace_mem_return_failed(mem, page);
- put_page(page);
- }
+ napi_direct &= !xdp_return_frame_no_direct();
+ page_pool_put_page(xa->page_pool, page, napi_direct);
rcu_read_unlock();
break;
case MEM_TYPE_PAGE_SHARED:
diff --combined net/dccp/proto.c
index a52e8ba1ced0,7175a33090b5..4d7396ed8693
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@@ -944,7 -944,7 +944,7 @@@ int inet_dccp_listen(struct socket *soc
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
goto out;
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
@@@ -1132,7 -1132,7 +1132,7 @@@ static int __init dccp_init(void
int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_member(struct sk_buff, cb));
rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
if (rc)
goto out_fail;
diff --combined net/ipv4/ip_gre.c
index 572b6307a2df,9c9716f7ccdb..2c8abf9dbb34
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@@ -340,8 -340,6 +340,8 @@@ static int __ipgre_rcv(struct sk_buff *
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
+ const struct iphdr *tnl_params;
+
if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
raw_proto, false) < 0)
goto drop;
@@@ -350,9 -348,7 +350,9 @@@
skb_pop_mac_header(skb);
else
skb_reset_mac_header(skb);
- if (tunnel->collect_md) {
+
+ tnl_params = &tunnel->parms.iph;
+ if (tunnel->collect_md || tnl_params->daddr == 0) {
__be16 flags;
__be64 tun_id;
@@@ -513,9 -509,9 +513,9 @@@ static void erspan_fb_xmit(struct sk_bu
key = &tun_info->key;
if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
goto err_free_skb;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
+ if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
+ md = ip_tunnel_info_opts(tun_info);
/* ERSPAN has fixed 8 byte GRE header */
version = md->version;
@@@ -1464,8 -1460,8 +1464,8 @@@ static const struct nla_policy ipgre_po
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_member(struct iphdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_member(struct iphdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
diff --combined net/ipv4/tcp.c
index 8a39ee794891,7d23503a1d2d..ef7c0693ad79
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -326,7 -326,7 +326,7 @@@ void tcp_enter_memory_pressure(struct s
{
unsigned long val;
- if (tcp_memory_pressure)
+ if (READ_ONCE(tcp_memory_pressure))
return;
val = jiffies;
@@@ -341,7 -341,7 +341,7 @@@ void tcp_leave_memory_pressure(struct s
{
unsigned long val;
- if (!tcp_memory_pressure)
+ if (!READ_ONCE(tcp_memory_pressure))
return;
val = xchg(&tcp_memory_pressure, 0);
if (val)
@@@ -450,8 -450,8 +450,8 @@@ void tcp_init_sock(struct sock *sk
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+ WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+ WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
sk_sockets_allocated_inc(sk);
sk->sk_route_forced_caps = NETIF_F_GSO;
@@@ -477,7 -477,7 +477,7 @@@ static void tcp_tx_timestamp(struct soc
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
- return (tp->rcv_nxt - tp->copied_seq >= target) ||
+ return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
(sk->sk_prot->stream_memory_read ?
sk->sk_prot->stream_memory_read(sk) : false);
}
@@@ -543,10 -543,10 +543,10 @@@ __poll_t tcp_poll(struct file *file, st
/* Connected or passive Fast Open socket? */
if (state != TCP_SYN_SENT &&
- (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+ (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
- if (tp->urg_seq == tp->copied_seq &&
+ if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
!sock_flag(sk, SOCK_URGINLINE) &&
tp->urg_data)
target++;
@@@ -584,7 -584,7 +584,7 @@@
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR;
return mask;
@@@ -607,8 -607,7 +607,8 @@@ int tcp_ioctl(struct sock *sk, int cmd
unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
- answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+ answ = tp->urg_data &&
+ READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
break;
case SIOCOUTQ:
if (sk->sk_state == TCP_LISTEN)
@@@ -617,7 -616,7 +617,7 @@@
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_una;
+ answ = READ_ONCE(tp->write_seq) - tp->snd_una;
break;
case SIOCOUTQNSD:
if (sk->sk_state == TCP_LISTEN)
@@@ -626,8 -625,7 +626,8 @@@
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_nxt;
+ answ = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
break;
default:
return -ENOIOCTLCMD;
@@@ -659,7 -657,7 +659,7 @@@ static void skb_entail(struct sock *sk
tcb->sacked = 0;
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
@@@ -1034,10 -1032,10 +1034,10 @@@ new_segment
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@@ -1364,7 -1362,7 +1364,7 @@@ new_segment
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@@ -1670,9 -1668,9 +1670,9 @@@ int tcp_read_sock(struct sock *sk, read
sk_eat_skb(sk, skb);
if (!desc->count)
break;
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
}
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
@@@ -1701,7 -1699,7 +1701,7 @@@ int tcp_set_rcvlowat(struct sock *sk, i
else
cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
val = min(val, cap);
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
/* Check if we need to signal EPOLLIN right now */
tcp_data_ready(sk);
@@@ -1711,7 -1709,7 +1711,7 @@@
val <<= 1;
if (val > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = val;
+ WRITE_ONCE(sk->sk_rcvbuf, val);
tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
}
return 0;
@@@ -1741,8 -1739,8 +1741,8 @@@ static int tcp_zerocopy_receive(struct
struct tcp_zerocopy_receive *zc)
{
unsigned long address = (unsigned long)zc->address;
+ u32 length = 0, seq, offset, zap_len;
const skb_frag_t *frags = NULL;
- u32 length = 0, seq, offset;
struct vm_area_struct *vma;
struct sk_buff *skb = NULL;
struct tcp_sock *tp;
@@@ -1769,12 -1767,12 +1769,12 @@@
seq = tp->copied_seq;
inq = tcp_inq(sk);
zc->length = min_t(u32, zc->length, inq);
- zc->length &= ~(PAGE_SIZE - 1);
- if (zc->length) {
- zap_page_range(vma, address, zc->length);
+ zap_len = zc->length & ~(PAGE_SIZE - 1);
+ if (zap_len) {
+ zap_page_range(vma, address, zap_len);
zc->recv_skip_hint = 0;
} else {
- zc->recv_skip_hint = inq;
+ zc->recv_skip_hint = zc->length;
}
ret = 0;
while (length + PAGE_SIZE <= zc->length) {
@@@ -1821,7 -1819,7 +1821,7 @@@
out:
up_read(¤t->mm->mmap_sem);
if (length) {
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
@@@ -1864,33 -1862,29 +1864,33 @@@ static void tcp_recv_timestamp(struct m
if (sock_flag(sk, SOCK_RCVTSTAMP)) {
if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
if (new_tstamp) {
- struct __kernel_timespec kts = {tss->ts[0].tv_sec, tss->ts[0].tv_nsec};
-
+ struct __kernel_timespec kts = {
+ .tv_sec = tss->ts[0].tv_sec,
+ .tv_nsec = tss->ts[0].tv_nsec,
+ };
put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
sizeof(kts), &kts);
} else {
- struct timespec ts_old = timespec64_to_timespec(tss->ts[0]);
-
+ struct __kernel_old_timespec ts_old = {
+ .tv_sec = tss->ts[0].tv_sec,
+ .tv_nsec = tss->ts[0].tv_nsec,
+ };
put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
sizeof(ts_old), &ts_old);
}
} else {
if (new_tstamp) {
- struct __kernel_sock_timeval stv;
-
- stv.tv_sec = tss->ts[0].tv_sec;
- stv.tv_usec = tss->ts[0].tv_nsec / 1000;
+ struct __kernel_sock_timeval stv = {
+ .tv_sec = tss->ts[0].tv_sec,
+ .tv_usec = tss->ts[0].tv_nsec / 1000,
+ };
put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
sizeof(stv), &stv);
} else {
- struct __kernel_old_timeval tv;
-
- tv.tv_sec = tss->ts[0].tv_sec;
- tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+ struct __kernel_old_timeval tv = {
+ .tv_sec = tss->ts[0].tv_sec,
+ .tv_usec = tss->ts[0].tv_nsec / 1000,
+ };
put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
sizeof(tv), &tv);
}
@@@ -1962,12 -1956,13 +1962,12 @@@ int tcp_recvmsg(struct sock *sk, struc
struct sk_buff *skb, *last;
u32 urg_hole = 0;
struct scm_timestamping_internal tss;
- bool has_tss = false;
- bool has_cmsg;
+ int cmsg_flags;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
- if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue)
&&
+ if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
sk_busy_loop(sk, nonblock);
@@@ -1977,7 -1972,7 +1977,7 @@@
if (sk->sk_state == TCP_LISTEN)
goto out;
- has_cmsg = tp->recvmsg_inq;
+ cmsg_flags = tp->recvmsg_inq ? 1 : 0;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@@ -2050,7 -2045,7 +2050,7 @@@
/* Well, if we have backlog, try to process it now yet. */
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
@@@ -2122,7 -2117,7 +2122,7 @@@ found_ok_skb
if (urg_offset < used) {
if (!urg_offset) {
if (!sock_flag(sk, SOCK_URGINLINE)) {
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
urg_hole++;
offset++;
used--;
@@@ -2144,7 -2139,7 +2144,7 @@@
}
}
- *seq += used;
+ WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
@@@ -2160,7 -2155,8 +2160,7 @@@ skip_copy
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss);
- has_tss = true;
- has_cmsg = true;
+ cmsg_flags |= 2;
}
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
@@@ -2170,7 -2166,7 +2170,7 @@@
found_fin_ok:
/* Process the FIN. */
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
break;
@@@ -2185,10 -2181,10 +2185,10 @@@
release_sock(sk);
- if (has_cmsg) {
- if (has_tss)
+ if (cmsg_flags) {
+ if (cmsg_flags & 2)
tcp_recv_timestamp(msg, sk, &tss);
- if (tp->recvmsg_inq) {
+ if (cmsg_flags & 1) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
}
@@@ -2491,10 -2487,7 +2491,10 @@@ adjudge_to_death
}
if (sk->sk_state == TCP_CLOSE) {
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ struct request_sock *req;
+
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ lockdep_sock_is_held(sk));
/* We could get here with a non-NULL req if the socket is
* aborted (e.g., closed with unread data) before 3WHS
* finishes.
@@@ -2566,7 -2559,6 +2566,7 @@@ int tcp_disconnect(struct sock *sk, in
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int old_state = sk->sk_state;
+ u32 seq;
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
@@@ -2593,7 -2585,7 +2593,7 @@@
__kfree_skb(sk->sk_rx_skb_cache);
sk->sk_rx_skb_cache = NULL;
}
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
@@@ -2609,12 -2601,9 +2609,12 @@@
tp->srtt_us = 0;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
tp->rcv_rtt_last_tsecr = 0;
- tp->write_seq += tp->max_window + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+
+ seq = tp->write_seq + tp->max_window + 2;
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
+
icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
@@@ -2668,7 -2657,6 +2668,7 @@@
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
inet->defer_connect = 0;
+ tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@@ -2942,9 -2930,9 +2942,9 @@@ static int do_tcp_setsockopt(struct soc
if (sk->sk_state != TCP_CLOSE)
err = -EPERM;
else if (tp->repair_queue == TCP_SEND_QUEUE)
- tp->write_seq = val;
+ WRITE_ONCE(tp->write_seq, val);
else if (tp->repair_queue == TCP_RECV_QUEUE)
- tp->rcv_nxt = val;
+ WRITE_ONCE(tp->rcv_nxt, val);
else
err = -EINVAL;
break;
@@@ -3227,8 -3215,8 +3227,8 @@@ void tcp_get_info(struct sock *sk, stru
* tcpi_unacked -> Number of children ready for accept()
* tcpi_sacked -> max backlog
*/
- info->tcpi_unacked = sk->sk_ack_backlog;
- info->tcpi_sacked = sk->sk_max_ack_backlog;
+ info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
+ info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
return;
}
@@@ -3308,7 -3296,6 +3308,7 @@@
info->tcpi_reord_seen = tp->reord_seen;
info->tcpi_rcv_ooopack = tp->rcv_ooopack;
info->tcpi_snd_wnd = tp->snd_wnd;
+ info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@@ -3844,13 -3831,7 +3844,13 @@@ EXPORT_SYMBOL(tcp_md5_hash_key)
void tcp_done(struct sock *sk)
{
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ struct request_sock *req;
+
+ /* We might be called with a new socket, after
+ * inet_csk_prepare_forced_close() has been called
+ * so we can not use lockdep_sock_is_held(sk)
+ */
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
@@@ -3949,7 -3930,7 +3949,7 @@@ void __init tcp_init(void
BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_member(struct sk_buff, cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --combined net/ipv6/ip6_gre.c
index 923034c52ce4,be1a8c823b2a..f20561b7f667
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@@ -980,9 -980,9 +980,9 @@@ static netdev_tx_t ip6erspan_tunnel_xmi
dsfield = key->tos;
if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
goto tx_err;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
+ if (tun_info->options_len < sizeof(*md))
goto tx_err;
+ md = ip_tunnel_info_opts(tun_info);
tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
@@@ -2170,8 -2170,8 +2170,8 @@@ static const struct nla_policy ip6gre_p
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_member(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_member(struct ipv6hdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
[IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
@@@ -2192,7 -2192,6 +2192,7 @@@ static void ip6erspan_tap_setup(struct
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6erspan_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
diff --combined net/netfilter/nf_tables_api.c
index 062b73a83af0,ec252362d2ab..d0256d54d47f
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@@ -151,64 -151,11 +151,64 @@@ static void nft_set_trans_bind(const st
}
}
+static int nft_netdev_register_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err, j;
+
+ j = 0;
+ list_for_each_entry(hook, hook_list, list) {
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0)
+ goto err_register;
+
+ j++;
+ }
+ return 0;
+
+err_register:
+ list_for_each_entry(hook, hook_list, list) {
+ if (j-- <= 0)
+ break;
+
+ nf_unregister_net_hook(net, &hook->ops);
+ }
+ return err;
+}
+
+static void nft_netdev_unregister_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list)
+ nf_unregister_net_hook(net, &hook->ops);
+}
+
+static int nft_register_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+ return nf_register_net_hook(net, &basechain->ops);
+}
+
+static void nft_unregister_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@@ -221,14 -168,14 +221,14 @@@
if (basechain->type->ops_register)
return basechain->type->ops_register(net, ops);
- return nf_register_net_hook(net, ops);
+ return nft_register_basechain_hooks(net, table->family, basechain);
}
static void nf_tables_unregister_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@@ -240,7 -187,7 +240,7 @@@
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
- nf_unregister_net_hook(net, ops);
+ nft_unregister_basechain_hooks(net, table->family, basechain);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@@ -361,7 -308,6 +361,7 @@@ static struct nft_trans *nft_trans_rule
static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
{
+ struct nft_flow_rule *flow;
struct nft_trans *trans;
int err;
@@@ -369,16 -315,6 +369,16 @@@
if (trans == NULL)
return -ENOMEM;
+ if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+ flow = nft_flow_rule_create(ctx->net, rule);
+ if (IS_ERR(flow)) {
+ nft_trans_destroy(trans);
+ return PTR_ERR(flow);
+ }
+
+ nft_trans_flow_rule(trans) = flow;
+ }
+
err = nf_tables_delrule_deactivate(ctx, rule);
if (err < 0) {
nft_trans_destroy(trans);
@@@ -806,8 -742,7 +806,8 @@@ static void nft_table_disable(struct ne
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ nft_unregister_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
}
}
@@@ -822,16 -757,14 +822,16 @@@ static int nf_tables_table_enable(struc
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ err = nft_register_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
if (err < 0)
- goto err;
+ goto err_register_hooks;
i++;
}
return 0;
-err:
+
+err_register_hooks:
if (i)
nft_table_disable(net, table, i);
return err;
@@@ -1292,46 -1225,6 +1292,46 @@@ nla_put_failure
return -ENOSPC;
}
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+ const struct nft_base_chain *basechain)
+{
+ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nft_hook *hook, *first = NULL;
+ struct nlattr *nest, *nest_devs;
+ int n = 0;
+
+ nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
+ if (nest == NULL)
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ goto nla_put_failure;
+
+ if (family == NFPROTO_NETDEV) {
+ nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (!first)
+ first = hook;
+
+ if (nla_put_string(skb, NFTA_DEVICE_NAME,
+ hook->ops.dev->name))
+ goto nla_put_failure;
+ n++;
+ }
+ nla_nest_end(skb, nest_devs);
+
+ if (n == 1 &&
+ nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+
+ return 0;
+nla_put_failure:
+ return -1;
+}
+
static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table,
@@@ -1360,10 -1253,21 +1360,10 @@@
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops;
struct nft_stats __percpu *stats;
- struct nlattr *nest;
- nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
- if (nest == NULL)
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
- goto nla_put_failure;
- if (basechain->dev_name[0] &&
- nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
+ if (nft_dump_basechain_hook(skb, family, basechain))
goto nla_put_failure;
- nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
htonl(basechain->policy)))
@@@ -1557,9 -1461,8 +1557,9 @@@ static void nft_chain_stats_replace(str
if (!nft_trans_chain_stats(trans))
return;
- rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
- lockdep_commit_lock_is_held(trans->ctx.net));
+ nft_trans_chain_stats(trans) =
+ rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans),
+ lockdep_commit_lock_is_held(trans->ctx.net));
if (!nft_trans_chain_stats(trans))
static_branch_inc(&nft_counters_enabled);
@@@ -1582,7 -1485,6 +1582,7 @@@ static void nf_tables_chain_free_chain_
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
+ struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
return;
@@@ -1593,13 -1495,6 +1593,13 @@@
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
+ if (ctx->family == NFPROTO_NETDEV) {
+ list_for_each_entry_safe(hook, next,
+ &basechain->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
module_put(basechain->type->owner);
if (rcu_access_pointer(basechain->stats)) {
static_branch_dec(&nft_counters_enabled);
@@@ -1613,125 -1508,13 +1613,125 @@@
}
}
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+ const struct nlattr *attr)
+{
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ struct nft_hook *hook;
+ int err;
+
+ hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+ if (!hook) {
+ err = -ENOMEM;
+ goto err_hook_alloc;
+ }
+
+ nla_strlcpy(ifname, attr, IFNAMSIZ);
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev) {
+ err = -ENOENT;
+ goto err_hook_dev;
+ }
+ hook->ops.dev = dev;
+
+ return hook;
+
+err_hook_dev:
+ kfree(hook);
+err_hook_alloc:
+ return ERR_PTR(err);
+}
+
+static bool nft_hook_list_find(struct list_head *hook_list,
+ const struct nft_hook *this)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list) {
+ if (this->ops.dev == hook->ops.dev)
+ return true;
+ }
+
+ return false;
+}
+
+static int nf_tables_parse_netdev_hooks(struct net *net,
+ const struct nlattr *attr,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook, *next;
+ const struct nlattr *tmp;
+ int rem, n = 0, err;
+
+ nla_for_each_nested(tmp, attr, rem) {
+ if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+ err = -EINVAL;
+ goto err_hook;
+ }
+
+ hook = nft_netdev_hook_alloc(net, tmp);
+ if (IS_ERR(hook)) {
+ err = PTR_ERR(hook);
+ goto err_hook;
+ }
+ if (nft_hook_list_find(hook_list, hook)) {
+ err = -EEXIST;
+ goto err_hook;
+ }
+ list_add_tail(&hook->list, hook_list);
+ n++;
+
+ if (n == NFT_NETDEVICE_MAX) {
+ err = -EFBIG;
+ goto err_hook;
+ }
+ }
+ if (!n)
+ return -EINVAL;
+
+ return 0;
+
+err_hook:
+ list_for_each_entry_safe(hook, next, hook_list, list) {
+ list_del(&hook->list);
+ kfree(hook);
+ }
+ return err;
+}
+
struct nft_chain_hook {
u32 num;
s32 priority;
const struct nft_chain_type *type;
- struct net_device *dev;
+ struct list_head list;
};
+static int nft_chain_parse_netdev(struct net *net,
+ struct nlattr *tb[],
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err;
+
+ if (tb[NFTA_HOOK_DEV]) {
+ hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ list_add_tail(&hook->list, hook_list);
+ } else if (tb[NFTA_HOOK_DEVS]) {
+ err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
+ hook_list);
+ if (err < 0)
+ return err;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
struct nft_chain_hook *hook, u8 family,
@@@ -1739,6 -1522,7 +1739,6 @@@
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
- struct net_device *dev;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
@@@ -1776,14 -1560,23 +1776,14 @@@
hook->type = type;
- hook->dev = NULL;
+ INIT_LIST_HEAD(&hook->list);
if (family == NFPROTO_NETDEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = __dev_get_by_name(net, ifname);
- if (!dev) {
+ err = nft_chain_parse_netdev(net, ha, &hook->list);
+ if (err < 0) {
module_put(type->owner);
- return -ENOENT;
+ return err;
}
- hook->dev = dev;
- } else if (ha[NFTA_HOOK_DEV]) {
+ } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
module_put(type->owner);
return -EOPNOTSUPP;
}
@@@ -1793,12 -1586,6 +1793,12 @@@
static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
+ struct nft_hook *h, *next;
+
+ list_for_each_entry_safe(h, next, &hook->list, list) {
+ list_del(&h->list);
+ kfree(h);
+ }
module_put(hook->type->owner);
}
@@@ -1823,49 -1610,6 +1823,49 @@@ static struct nft_rule **nf_tables_chai
return kvmalloc(alloc, GFP_KERNEL);
}
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
+ const struct nft_chain_hook *hook,
+ struct nft_chain *chain)
+{
+ ops->pf = family;
+ ops->hooknum = hook->num;
+ ops->priority = hook->priority;
+ ops->priv = chain;
+ ops->hook = hook->type->hooks[ops->hooknum];
+}
+
+static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+ struct nft_chain_hook *hook, u32 flags)
+{
+ struct nft_chain *chain;
+ struct nft_hook *h;
+
+ basechain->type = hook->type;
+ INIT_LIST_HEAD(&basechain->hook_list);
+ chain = &basechain->chain;
+
+ if (family == NFPROTO_NETDEV) {
+ list_splice_init(&hook->list, &basechain->hook_list);
+ list_for_each_entry(h, &basechain->hook_list, list)
+ nft_basechain_hook_init(&h->ops, family, hook, chain);
+
+ basechain->ops.hooknum = hook->num;
+ basechain->ops.priority = hook->priority;
+ } else {
+ nft_basechain_hook_init(&basechain->ops, family, hook, chain);
+ }
+
+ chain->flags |= NFT_BASE_CHAIN | flags;
+ basechain->policy = NF_ACCEPT;
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ nft_chain_offload_priority(basechain) < 0)
+ return -EOPNOTSUPP;
+
+ flow_block_init(&basechain->flow_block);
+
+ return 0;
+}
+
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags)
{
@@@ -1884,6 -1628,7 +1884,6 @@@
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
err = nft_chain_parse_hook(net, nla, &hook, family, true);
if (err < 0)
@@@ -1894,7 -1639,9 +1894,7 @@@
nft_chain_release_hook(&hook);
return -ENOMEM;
}
-
- if (hook.dev != NULL)
- strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
+ chain = &basechain->chain;
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@@ -1907,12 -1654,24 +1907,12 @@@
static_branch_inc(&nft_counters_enabled);
}
- basechain->type = hook.type;
- chain = &basechain->chain;
-
- ops = &basechain->ops;
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = hook.type->hooks[ops->hooknum];
- ops->dev = hook.dev;
-
- chain->flags |= NFT_BASE_CHAIN | flags;
- basechain->policy = NF_ACCEPT;
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
- return -EOPNOTSUPP;
-
- flow_block_init(&basechain->flow_block);
+ err = nft_basechain_init(basechain, family, &hook, flags);
+ if (err < 0) {
+ nft_chain_release_hook(&hook);
+ kfree(basechain);
+ return err;
+ }
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@@ -1972,25 -1731,6 +1972,25 @@@ err1
return err;
}
+static bool nft_hook_list_equal(struct list_head *hook_list1,
+ struct list_head *hook_list2)
+{
+ struct nft_hook *hook;
+ int n = 0, m = 0;
+
+ n = 0;
+ list_for_each_entry(hook, hook_list2, list) {
+ if (!nft_hook_list_find(hook_list1, hook))
+ return false;
+
+ n++;
+ }
+ list_for_each_entry(hook, hook_list1, list)
+ m++;
+
+ return n == m;
+}
+
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags)
{
@@@ -2022,19 -1762,12 +2022,19 @@@
return -EBUSY;
}
- ops = &basechain->ops;
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
+ if (ctx->family == NFPROTO_NETDEV) {
+ if (!nft_hook_list_equal(&basechain->hook_list,
+ &hook.list)) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ } else {
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
}
nft_chain_release_hook(&hook);
}
@@@ -2189,7 -1922,6 +2189,7 @@@ static int nf_tables_newchain(struct ne
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ flags |= chain->flags & NFT_BASE_CHAIN;
return nf_tables_updchain(&ctx, genmask, policy, flags);
}
@@@ -5411,6 -5143,9 +5411,6 @@@ static int nf_tables_updobj(const struc
struct nft_trans *trans;
int err;
- if (!obj->ops->update)
- return -EOPNOTSUPP;
-
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
@@@ -5847,7 -5582,6 +5847,7 @@@ static const struct nla_policy nft_flow
.len = NFT_NAME_MAXLEN - 1 },
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
[NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
+ [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
};
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
@@@ -5894,6 -5628,43 +5894,6 @@@ nft_flowtable_lookup_byhandle(const str
return ERR_PTR(-ENOENT);
}
-static int nf_tables_parse_devices(const struct nft_ctx *ctx,
- const struct nlattr *attr,
- struct net_device *dev_array[], int *len)
-{
- const struct nlattr *tmp;
- struct net_device *dev;
- char ifname[IFNAMSIZ];
- int rem, n = 0, err;
-
- nla_for_each_nested(tmp, attr, rem) {
- if (nla_type(tmp) != NFTA_DEVICE_NAME) {
- err = -EINVAL;
- goto err1;
- }
-
- nla_strlcpy(ifname, tmp, IFNAMSIZ);
- dev = __dev_get_by_name(ctx->net, ifname);
- if (!dev) {
- err = -ENOENT;
- goto err1;
- }
-
- dev_array[n++] = dev;
- if (n == NFT_FLOWTABLE_DEVICE_MAX) {
- err = -EFBIG;
- goto err1;
- }
- }
- if (!len)
- return -EINVAL;
-
- err = 0;
-err1:
- *len = n;
- return err;
-}
-
static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] =
{
[NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
[NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
@@@ -5904,10 -5675,11 +5904,10 @@@ static int nf_tables_flowtable_parse_ho
const struct nlattr *attr,
struct nft_flowtable *flowtable)
{
- struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
- struct nf_hook_ops *ops;
+ struct nft_hook *hook;
int hooknum, priority;
- int err, n = 0, i;
+ int err;
err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
nft_flowtable_hook_policy, NULL);
@@@ -5925,21 -5697,27 +5925,21 @@@
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
- err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
- dev_array, &n);
+ err = nf_tables_parse_netdev_hooks(ctx->net,
+ tb[NFTA_FLOWTABLE_HOOK_DEVS],
+ &flowtable->hook_list);
if (err < 0)
return err;
- ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- flowtable->hooknum = hooknum;
- flowtable->priority = priority;
- flowtable->ops = ops;
- flowtable->ops_len = n;
+ flowtable->hooknum = hooknum;
+ flowtable->data.priority = priority;
- for (i = 0; i < n; i++) {
- flowtable->ops[i].pf = NFPROTO_NETDEV;
- flowtable->ops[i].hooknum = hooknum;
- flowtable->ops[i].priority = priority;
- flowtable->ops[i].priv = &flowtable->data;
- flowtable->ops[i].hook = flowtable->data.type->hook;
- flowtable->ops[i].dev = dev_array[i];
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ hook->ops.pf = NFPROTO_NETDEV;
+ hook->ops.hooknum = hooknum;
+ hook->ops.priority = priority;
+ hook->ops.priv = &flowtable->data;
+ hook->ops.hook = flowtable->data.type->hook;
}
return err;
@@@ -5976,73 -5754,17 +5976,73 @@@ nft_flowtable_type_get(struct net *net
return ERR_PTR(-ENOENT);
}
+static void nft_unregister_flowtable_hook(struct net *net,
+ struct nft_flowtable *flowtable,
+ struct nft_hook *hook)
+{
+ nf_unregister_net_hook(net, &hook->ops);
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+}
+
static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
+ list_for_each_entry(hook, &flowtable->hook_list, list)
+ nft_unregister_flowtable_hook(net, flowtable, hook);
+}
+
+static int nft_register_flowtable_net_hooks(struct net *net,
+ struct nft_table *table,
+ struct nft_flowtable *flowtable)
+{
+ struct nft_hook *hook, *hook2, *next;
+ struct nft_flowtable *ft;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ list_for_each_entry(ft, &table->flowtables, list) {
+ list_for_each_entry(hook2, &ft->hook_list, list) {
+ if (hook->ops.dev == hook2->ops.dev &&
+ hook->ops.pf == hook2->ops.pf) {
+ err = -EBUSY;
+ goto err_unregister_net_hooks;
+ }
+ }
+ }
+
+ err = flowtable->data.type->setup(&flowtable->data,
+ hook->ops.dev,
+ FLOW_BLOCK_BIND);
+ if (err < 0)
+ goto err_unregister_net_hooks;
+
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0) {
+ flowtable->data.type->setup(&flowtable->data,
+ hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+ goto err_unregister_net_hooks;
+ }
+
+ i++;
+ }
+
+ return 0;
- nf_unregister_net_hook(net, &flowtable->ops[i]);
+err_unregister_net_hooks:
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ nft_unregister_flowtable_hook(net, flowtable, hook);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
}
+
+ return err;
}
static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
@@@ -6053,13 -5775,12 +6053,13 @@@
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nf_flowtable_type *type;
- struct nft_flowtable *flowtable, *ft;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_hook *hook, *next;
struct nft_table *table;
struct nft_ctx ctx;
- int err, i, k;
+ int err;
if (!nla[NFTA_FLOWTABLE_TABLE] ||
!nla[NFTA_FLOWTABLE_NAME] ||
@@@ -6098,7 -5819,6 +6098,7 @@@
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&flowtable->hook_list);
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
if (!flowtable->name) {
@@@ -6112,14 -5832,6 +6112,14 @@@
goto err2;
}
+ if (nla[NFTA_FLOWTABLE_FLAGS]) {
+ flowtable->data.flags =
+ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+ if (flowtable->data.flags & ~NF_FLOWTABLE_HW_OFFLOAD)
+ goto err3;
+ }
+
+
write_pnet(&flowtable->data.net, net);
flowtable->data.type = type;
err = type->init(&flowtable->data);
if (err < 0)
@@@ -6130,24 -5842,43 +6130,24 @@@
if (err < 0)
goto err4;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
-
- list_for_each_entry(ft, &table->flowtables, list) {
- for (k = 0; k < ft->ops_len; k++) {
- if (!ft->ops[k].dev)
- continue;
-
- if (flowtable->ops[i].dev == ft->ops[k].dev &&
- flowtable->ops[i].pf == ft->ops[k].pf) {
- err = -EBUSY;
- goto err5;
- }
- }
- }
-
- err = nf_register_net_hook(net, &flowtable->ops[i]);
- if (err < 0)
- goto err5;
- }
+ err =
nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
+ if (err < 0)
+ goto err4;
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err6;
+ goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
table->use++;
return 0;
-err6:
- i = flowtable->ops_len;
err5:
- for (k = i - 1; k >= 0; k--)
- nf_unregister_net_hook(net, &flowtable->ops[k]);
-
- kfree(flowtable->ops);
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ nft_unregister_flowtable_hook(net, flowtable, hook);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
err4:
flowtable->data.type->free(&flowtable->data);
err3:
@@@ -6214,8 -5945,8 +6214,8 @@@ static int nf_tables_fill_flowtable_inf
{
struct nlattr *nest, *nest_devs;
struct nfgenmsg *nfmsg;
+ struct nft_hook *hook;
struct nlmsghdr *nlh;
- int i;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
@@@ -6231,23 -5962,25 +6231,23 @@@
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
- NFTA_FLOWTABLE_PAD))
+ NFTA_FLOWTABLE_PAD) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
if (!nest)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
- nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY,
htonl(flowtable->data.priority)))
goto nla_put_failure;
nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
if (!nest_devs)
goto nla_put_failure;
- for (i = 0; i < flowtable->ops_len; i++) {
- const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
-
- if (dev &&
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
+ list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
+ if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@@ -6438,12 -6171,7 +6438,12 @@@ err
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
- kfree(flowtable->ops);
+ struct nft_hook *hook, *next;
+
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree(hook);
+ }
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
@@@ -6483,15 -6211,14 +6483,15 @@@ nla_put_failure
static void nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->ops[i].dev != dev)
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ if (hook->ops.dev != dev)
continue;
- nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
- flowtable->ops[i].dev = NULL;
+ nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
break;
}
}
@@@ -6772,8 -6499,7 +6772,8 @@@ static void nft_obj_commit_update(struc
obj = nft_trans_obj(trans);
newobj = nft_trans_obj_newobj(trans);
- obj->ops->update(obj, newobj);
+ if (obj->ops->update)
+ obj->ops->update(obj, newobj);
kfree(newobj);
}
@@@ -7595,7 -7321,7 +7595,7 @@@ int nft_validate_register_load(enum nft
return -EINVAL;
if (len == 0)
return -EINVAL;
- if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
+ if (reg * NFT_REG32_SIZE + len > sizeof_member(struct nft_regs, data))
return -ERANGE;
return 0;
@@@ -7643,7 -7369,7 +7643,7 @@@ int nft_validate_register_store(const s
if (len == 0)
return -EINVAL;
if (reg * NFT_REG32_SIZE + len >
- FIELD_SIZEOF(struct nft_regs, data))
+ sizeof_member(struct nft_regs, data))
return -ERANGE;
if (data != NULL && type != NFT_DATA_VALUE)
diff --combined net/openvswitch/datapath.c
index 293d5289c4a1,4c412b2b27b4..2dfb1d6117a8
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@@ -227,8 -227,7 +227,8 @@@ void ovs_dp_process_packet(struct sk_bu
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+ &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@@ -350,8 -349,7 +350,8 @@@ static size_t upcall_msg_size(const str
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@@ -394,7 -392,6 +394,7 @@@ static int queue_userspace_packet(struc
size_t len;
unsigned int hlen;
int err, dp_ifindex;
+ u64 hash;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
@@@ -487,30 -484,23 +487,30 @@@
}
/* Add OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru) {
- if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
- upcall_info->mru)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (upcall_info->mru &&
+ nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
- if (cutlen > 0) {
- if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
- skb->len)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (cutlen > 0 &&
+ nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ /* Add OVS_PACKET_ATTR_HASH */
+ hash = skb_get_hash_raw(skb);
+ if (skb->sw_hash)
+ hash |= OVS_PACKET_HASH_SW_BIT;
+
+ if (skb->l4_hash)
+ hash |= OVS_PACKET_HASH_L4_BIT;
+
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Only reserve room for attribute header, packet data is added
@@@ -552,7 -542,6 +552,7 @@@ static int ovs_packet_cmd_execute(struc
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
+ u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@@ -578,14 -567,6 +578,14 @@@
}
OVS_CB(packet)->mru = mru;
+ if (a[OVS_PACKET_ATTR_HASH]) {
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
+ }
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@@ -723,13 -704,9 +723,13 @@@ static size_t ovs_flow_cmd_msg_size(con
{
size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
- /* OVS_FLOW_ATTR_UFID */
+ /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
+ * see ovs_nla_put_identifier()
+ */
if (sfid && ovs_identifier_is_ufid(sfid))
len += nla_total_size(sfid->ufid_len);
+ else
+ len += nla_total_size(ovs_key_attr_size());
/* OVS_FLOW_ATTR_KEY */
if (!sfid || should_fill_key(sfid, ufid_flags))
@@@ -1598,31 -1575,6 +1598,31 @@@ static int ovs_dp_change(struct datapat
return 0;
}
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
+}
+
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@@ -1631,7 -1583,7 +1631,7 @@@
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@@ -1644,26 -1596,35 +1644,26 @@@
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
-
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@@ -1695,7 -1656,6 +1695,7 @@@
ovs_dp_reset_user_features(skb, info);
}
+ ovs_unlock();
goto err_destroy_meters;
}
@@@ -1712,16 -1672,17 +1712,16 @@@
return 0;
err_destroy_meters:
- ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
@@@ -1920,7 -1881,7 +1920,7 @@@ static struct genl_family dp_datapath_g
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
struct net *net, u32 portid, u32 seq,
- u32 flags, u8 cmd)
+ u32 flags, u8 cmd, gfp_t gfp)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@@ -1941,7 -1902,7 +1941,7 @@@
goto nla_put_failure;
if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev));
+ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
goto nla_put_failure;
@@@ -1982,12 -1943,11 +1982,12 @@@ struct sk_buff *ovs_vport_cmd_build_inf
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+ GFP_KERNEL);
BUG_ON(retval < 0);
return skb;
@@@ -2129,7 -2089,7 +2129,7 @@@ restart
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_NEW, GFP_KERNEL);
new_headroom = netdev_get_fwd_headroom(vport->dev);
@@@ -2190,7 -2150,7 +2190,7 @@@ static int ovs_vport_cmd_set(struct sk_
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_SET);
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
BUG_ON(err < 0);
ovs_unlock();
@@@ -2230,7 -2190,7 +2230,7 @@@ static int ovs_vport_cmd_del(struct sk_
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_DEL);
+ OVS_VPORT_CMD_DEL, GFP_KERNEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
@@@ -2277,7 -2237,7 +2277,7 @@@ static int ovs_vport_cmd_get(struct sk_
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_GET);
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
BUG_ON(err < 0);
rcu_read_unlock();
@@@ -2313,8 -2273,7 +2313,8 @@@ static int ovs_vport_cmd_dump(struct sk
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_GET) < 0)
+ OVS_VPORT_CMD_GET,
+ GFP_ATOMIC) < 0)
goto out;
j++;
@@@ -2490,7 -2449,7 +2490,7 @@@ static int __init dp_init(void
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_member(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
diff --combined net/openvswitch/flow.h
index fd8ed766bdd1,c9987cd5e03c..23cb757b11b5
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@@ -30,14 -30,13 +30,14 @@@ enum sw_flow_mac_proto
MAC_PROTO_ETHERNET,
};
#define SW_FLOW_KEY_INVALID 0x80
+#define MPLS_LABEL_DEPTH 3
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
*/
#define TUN_METADATA_OFFSET(opt_len) \
- (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+ (sizeof_member(struct sw_flow_key, tun_opts) - opt_len)
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
@@@ -52,7 -51,7 +52,7 @@@ struct vlan_head
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
- FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+ sizeof_member(struct sw_flow_key, recirc_id))
struct ovs_key_nsh {
struct ovs_nsh_key_base base;
@@@ -85,6 -84,9 +85,6 @@@ struct sw_flow_key
* protocol.
*/
union {
- struct {
- __be32 top_lse; /* top label stack entry */
- } mpls;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
@@@ -133,11 -135,6 +133,11 @@@
} nd;
};
} ipv6;
+ struct {
+ u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH
*/
+ __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */
+ } mpls;
+
struct ovs_key_nsh nsh; /* network service header */
};
struct {
@@@ -169,6 -166,7 +169,6 @@@ struct sw_flow_key_range
struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
- struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
};
diff --combined net/sched/act_ct.c
index ae0de372b1c8,3ee22bbac3ff..757d47bced37
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@@ -312,7 -312,7 +312,7 @@@ static void tcf_ct_act_set_labels(struc
u32 *labels_m)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
- size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels);
+ size_t labels_sz = sizeof_member(struct tcf_ct_params, labels);
if (!memchr_inv(labels_m, 0, labels_sz))
return;
@@@ -465,15 -465,16 +465,15 @@@ out_push
skb_push_rcsum(skb, nh_ofs);
out:
- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ tcf_action_update_bstats(&c->common, skb);
return retval;
drop:
- qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
+ tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
}
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
- [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 },
[TCA_CT_ACTION] = { .type = NLA_U16 },
[TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) },
[TCA_CT_ZONE] = { .type = NLA_U16 },
@@@ -655,7 -656,7 +655,7 @@@ static int tcf_ct_fill_params(struct ne
static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int replace, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ct_net_id);
@@@ -687,8 -688,8 +687,8 @@@
return err;
if (!err) {
- err = tcf_idr_create(tn, index, est, a,
- &act_ct_ops, bind, true);
+ err = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_ct_ops, bind, flags);
if (err) {
tcf_idr_cleanup(tn, index);
return err;
@@@ -721,8 -722,7 +721,8 @@@
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock));
+ params = rcu_replace_pointer(c->params, params,
+ lockdep_is_held(&c->tcf_lock));
spin_unlock_bh(&c->tcf_lock);
if (goto_ch)
@@@ -905,7 -905,11 +905,7 @@@ static void tcf_stats_update(struct tc_
{
struct tcf_ct *c = to_ct(a);
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
}
@@@ -925,7 -929,7 +925,7 @@@ static struct tc_action_ops act_ct_ops
static __net_init int ct_init_net(struct net *net)
{
- unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8;
+ unsigned int n_bits = sizeof_member(struct tcf_ct_params, labels) * 8;
struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
if (nf_connlabels_get(net, n_bits - 1)) {
diff --combined net/sched/cls_flower.c
index c307ee1d6ca6,4e8a3377ac4e..66948c4c1701
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@@ -22,8 -22,6 +22,8 @@@
#include <net/ip.h>
#include <net/flow_dissector.h>
#include <net/geneve.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@@ -690,11 -688,7 +690,11 @@@ static const struct nla_policy fl_polic
static const struct nla_policy
enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPTS_UNSPEC] = {
+ .strict_start_type = TCA_FLOWER_KEY_ENC_OPTS_VXLAN },
[TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@@ -705,19 -699,6 +705,19 @@@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OP
.len = 128 },
};
+static const struct nla_policy
+vxlan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
+};
+
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@@ -947,105 -928,6 +947,105 @@@ static int fl_set_geneve_opt(const stru
return sizeof(struct geneve_opt) + data_len;
}
+static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1];
+ struct vxlan_metadata *md;
+ int err;
+
+ md = (struct vxlan_metadata *)&key->enc_opts.data[key->enc_opts.len];
+ memset(md, 0xff, sizeof(*md));
+
+ if (!depth)
+ return sizeof(*md);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_VXLAN) {
+ NL_SET_ERR_MSG(extack, "Non-vxlan option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX, nla,
+ vxlan_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP])
+ md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]);
+
+ return sizeof(*md);
+}
+
+static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1];
+ struct erspan_metadata *md;
+ int err;
+
+ md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len];
+ memset(md, 0xff, sizeof(*md));
+ md->version = 1;
+
+ if (!depth)
+ return sizeof(*md);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_ERSPAN) {
+ NL_SET_ERR_MSG(extack, "Non-erspan option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX, nla,
+ erspan_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER])
+ md->version = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]);
+
+ if (md->version == 1) {
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
+ return -EINVAL;
+ }
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
+ nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
+ md->u.index = nla_get_be32(nla);
+ }
+ } else if (md->version == 2) {
+ if (!option_len && (!tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID])) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
+ return -EINVAL;
+ }
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) {
+ nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR];
+ md->u.md2.dir = nla_get_u8(nla);
+ }
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) {
+ nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID];
+ set_hwid(&md->u.md2, nla_get_u8(nla));
+ }
+ } else {
+ NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect");
+ return -EINVAL;
+ }
+
+ return sizeof(*md);
+}
+
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@@ -1076,11 -958,6 +1076,11 @@@
nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
switch (nla_type(nla_opt_key)) {
case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
+ if (key->enc_opts.dst_opt_type &&
+ key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
+ return -EINVAL;
+ }
option_len = 0;
key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
option_len = fl_set_geneve_opt(nla_opt_key, key,
@@@ -1106,72 -983,6 +1106,72 @@@
return -EINVAL;
}
+ if (msk_depth)
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ break;
+ case TCA_FLOWER_KEY_ENC_OPTS_VXLAN:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ option_len = fl_set_vxlan_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ option_len = fl_set_vxlan_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+
+ if (msk_depth)
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ break;
+ case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for erspan options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ option_len = fl_set_erspan_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ option_len = fl_set_erspan_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+
if (msk_depth)
nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
break;
@@@ -1474,7 -1285,7 +1474,7 @@@ static int fl_init_mask_hashtable(struc
}
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
- #define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
+ #define FL_KEY_MEMBER_SIZE(member) sizeof_member(struct fl_flow_key, member)
#define FL_KEY_IS_MASKED(mask, member) \
memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \
@@@ -2324,61 -2135,6 +2324,61 @@@ nla_put_failure
return -EMSGSIZE;
}
+static int fl_dump_key_vxlan_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct vxlan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_VXLAN);
+ if (!nest)
+ goto nla_put_failure;
+
+ md = (struct vxlan_metadata *)&enc_opts->data[0];
+ if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, md->gbp))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int fl_dump_key_erspan_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct erspan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_ERSPAN);
+ if (!nest)
+ goto nla_put_failure;
+
+ md = (struct erspan_metadata *)&enc_opts->data[0];
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, md->version))
+ goto nla_put_failure;
+
+ if (md->version == 1 &&
+ nla_put_be32(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index))
+ goto nla_put_failure;
+
+ if (md->version == 2 &&
+ (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR,
+ md->u.md2.dir) ||
+ nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID,
+ get_hwid(&md->u.md2))))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int fl_dump_key_ct(struct sk_buff *skb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask)
@@@ -2432,16 -2188,6 +2432,16 @@@ static int fl_dump_key_options(struct s
if (err)
goto nla_put_failure;
break;
+ case TUNNEL_VXLAN_OPT:
+ err = fl_dump_key_vxlan_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
+ case TUNNEL_ERSPAN_OPT:
+ err = fl_dump_key_erspan_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
default:
goto nla_put_failure;
}
diff --combined net/sctp/socket.c
index 0b485952a71c,4c3105d96dce..140a13cfc93b
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -384,7 -384,7 +384,7 @@@ static int sctp_do_bind(struct sock *sk
}
}
- if (snum && snum < inet_prot_sock(net) &&
+ if (snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
@@@ -1061,7 -1061,7 +1061,7 @@@ static int sctp_connect_new_asoc(struc
if (sctp_autobind(sk))
return -EAGAIN;
} else {
- if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+ if (inet_port_requires_bind_service(net, ep->base.bind_addr.port) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
}
@@@ -3943,22 -3943,18 +3943,22 @@@ static int sctp_setsockopt_auto_asconf(
*/
static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
char __user *optval,
- unsigned int optlen)
+ unsigned int optlen, bool v2)
{
- struct sctp_paddrthlds val;
+ struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
+ int len;
- if (optlen < sizeof(struct sctp_paddrthlds))
+ len = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
+ if (optlen < len)
return -EINVAL;
- if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval,
- sizeof(struct sctp_paddrthlds)))
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
+ if (v2 && val.spt_pathpfthld > val.spt_pathcpthld)
+ return -EINVAL;
+
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
trans = sctp_addr_id2transport(sk, &val.spt_address,
val.spt_assoc_id);
@@@ -3967,8 -3963,6 +3967,8 @@@
if (val.spt_pathmaxrxt)
trans->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ trans->ps_retrans = val.spt_pathcpthld;
trans->pf_retrans = val.spt_pathpfthld;
return 0;
@@@ -3984,23 -3978,17 +3984,23 @@@
transports) {
if (val.spt_pathmaxrxt)
trans->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ trans->ps_retrans = val.spt_pathcpthld;
trans->pf_retrans = val.spt_pathpfthld;
}
if (val.spt_pathmaxrxt)
asoc->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ asoc->ps_retrans = val.spt_pathcpthld;
asoc->pf_retrans = val.spt_pathpfthld;
} else {
struct sctp_sock *sp = sctp_sk(sk);
if (val.spt_pathmaxrxt)
sp->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ sp->ps_retrans = val.spt_pathcpthld;
sp->pf_retrans = val.spt_pathpfthld;
}
@@@ -4601,40 -4589,6 +4601,40 @@@ out
return retval;
}
+static int sctp_setsockopt_pf_expose(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(¶ms, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_value > SCTP_PF_EXPOSE_MAX)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ goto out;
+
+ if (asoc)
+ asoc->pf_expose = params.assoc_value;
+ else
+ sctp_sk(sk)->pf_expose = params.assoc_value;
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@@ -4790,12 -4744,7 +4790,12 @@@ static int sctp_setsockopt(struct sock
retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
- retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
+ retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
+ false);
+ break;
+ case SCTP_PEER_ADDR_THLDS_V2:
+ retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
+ true);
break;
case SCTP_RECVRCVINFO:
retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
@@@ -4849,9 -4798,6 +4849,9 @@@
case SCTP_ECN_SUPPORTED:
retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
break;
+ case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
+ retval = sctp_setsockopt_pf_expose(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@@ -5095,8 -5041,6 +5095,8 @@@ static int sctp_init_sock(struct sock *
sp->hbinterval = net->sctp.hb_interval;
sp->pathmaxrxt = net->sctp.max_retrans_path;
sp->pf_retrans = net->sctp.pf_retrans;
+ sp->ps_retrans = net->sctp.ps_retrans;
+ sp->pf_expose = net->sctp.pf_expose;
sp->pathmtu = 0; /* allow default discovery */
sp->sackdelay = net->sctp.sack_timeout;
sp->sackfreq = 2;
@@@ -5577,16 -5521,8 +5577,16 @@@ static int sctp_getsockopt_peer_addr_in
transport = sctp_addr_id2transport(sk, &pinfo.spinfo_address,
pinfo.spinfo_assoc_id);
- if (!transport)
- return -EINVAL;
+ if (!transport) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (transport->state == SCTP_PF &&
+ transport->asoc->pf_expose == SCTP_PF_EXPOSE_DISABLE) {
+ retval = -EACCES;
+ goto out;
+ }
pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
pinfo.spinfo_state = transport->state;
@@@ -7234,19 -7170,18 +7234,19 @@@ static int sctp_getsockopt_assoc_ids(st
*
http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
*/
static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
- char __user *optval,
- int len,
- int __user *optlen)
+ char __user *optval, int len,
+ int __user *optlen, bool v2)
{
- struct sctp_paddrthlds val;
+ struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
+ int min;
- if (len < sizeof(struct sctp_paddrthlds))
+ min = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
+ if (len < min)
return -EINVAL;
- len = sizeof(struct sctp_paddrthlds);
- if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len))
+ len = min;
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
@@@ -7257,7 -7192,6 +7257,7 @@@
val.spt_pathmaxrxt = trans->pathmaxrxt;
val.spt_pathpfthld = trans->pf_retrans;
+ val.spt_pathcpthld = trans->ps_retrans;
goto out;
}
@@@ -7270,13 -7204,11 +7270,13 @@@
if (asoc) {
val.spt_pathpfthld = asoc->pf_retrans;
val.spt_pathmaxrxt = asoc->pathmaxrxt;
+ val.spt_pathcpthld = asoc->ps_retrans;
} else {
struct sctp_sock *sp = sctp_sk(sk);
val.spt_pathpfthld = sp->pf_retrans;
val.spt_pathmaxrxt = sp->pathmaxrxt;
+ val.spt_pathcpthld = sp->ps_retrans;
}
out:
@@@ -7968,45 -7900,6 +7968,45 @@@ out
return retval;
}
+static int sctp_getsockopt_pf_expose(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(¶ms, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = asoc ? asoc->pf_expose
+ : sctp_sk(sk)->pf_expose;
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, ¶ms, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@@ -8156,12 -8049,7 +8156,12 @@@
retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
- retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen);
+ retval = sctp_getsockopt_paddr_thresholds(sk, optval, len,
+ optlen, false);
+ break;
+ case SCTP_PEER_ADDR_THLDS_V2:
+ retval = sctp_getsockopt_paddr_thresholds(sk, optval, len,
+ optlen, true);
break;
case SCTP_GET_ASSOC_STATS:
retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen);
@@@ -8224,9 -8112,6 +8224,9 @@@
case SCTP_ECN_SUPPORTED:
retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen);
break;
+ case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
+ retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@@ -8267,7 -8152,6 +8267,7 @@@ static int sctp_get_port_local(struct s
struct sctp_sock *sp = sctp_sk(sk);
bool reuse = (sk->sk_reuse || sp->reuse);
struct sctp_bind_hashbucket *head; /* hash list */
+ struct net *net = sock_net(sk);
kuid_t uid = sock_i_uid(sk);
struct sctp_bind_bucket *pp;
unsigned short snum;
@@@ -8283,6 -8167,7 +8283,6 @@@
/* Search for an available port. */
int low, high, remaining, index;
unsigned int rover;
- struct net *net = sock_net(sk);
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
@@@ -8294,12 -8179,12 +8294,12 @@@
rover = low;
if (inet_is_local_reserved_port(net, rover))
continue;
- index = sctp_phashfn(sock_net(sk), rover);
+ index = sctp_phashfn(net, rover);
head = &sctp_port_hashtable[index];
spin_lock(&head->lock);
sctp_for_each_hentry(pp, &head->chain)
if ((pp->port == rover) &&
- net_eq(sock_net(sk), pp->net))
+ net_eq(net, pp->net))
goto next;
break;
next:
@@@ -8323,10 -8208,10 +8323,10 @@@
* to the port number (snum) - we detect that with the
* port iterator, pp being NULL.
*/
- head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
+ head = &sctp_port_hashtable[sctp_phashfn(net, snum)];
spin_lock(&head->lock);
sctp_for_each_hentry(pp, &head->chain) {
- if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
+ if ((pp->port == snum) && net_eq(pp->net, net))
goto pp_found;
}
}
@@@ -8382,7 -8267,7 +8382,7 @@@ pp_found
pp_not_found:
/* If there was a hash table miss, create a new port. */
ret = 1;
- if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum)))
+ if (!pp && !(pp = sctp_bucket_create(head, net, snum)))
goto fail_unlock;
/* In either case (hit or miss), make sure fastreuse is 1 only
@@@ -8491,7 -8376,7 +8491,7 @@@ static int sctp_listen_start(struct soc
}
}
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
return sctp_hash_endpoint(ep);
}
@@@ -8545,7 -8430,7 +8545,7 @@@ int sctp_inet_listen(struct socket *soc
/* If we are already listening, just update the backlog */
if (sctp_sstate(sk, LISTENING))
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
else {
err = sctp_listen_start(sk, backlog);
if (err)
@@@ -8591,7 -8476,7 +8591,7 @@@ __poll_t sctp_poll(struct file *file, s
mask = 0;
/* Is there any exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
@@@ -8600,7 -8485,7 +8600,7 @@@
mask |= EPOLLHUP;
/* Is it readable? Reconsider this code with TCP-style support. */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* The association is either gone or not ready. */
@@@ -8986,7 -8871,7 +8986,7 @@@ struct sk_buff *sctp_skb_recv_datagram(
if (sk_can_busy_loop(sk)) {
sk_busy_loop(sk, noblock);
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
continue;
}
@@@ -9421,7 -9306,7 +9421,7 @@@ void sctp_copy_sock(struct sock *newsk
newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
newinet->inet_dport = htons(asoc->peer.port);
newinet->pmtudisc = inet->pmtudisc;
- newinet->inet_id = asoc->next_tsn ^ jiffies;
+ newinet->inet_id = prandom_u32();
newinet->uc_ttl = inet->uc_ttl;
newinet->mc_loop = 1;
@@@ -9615,12 -9500,12 +9615,12 @@@ struct proto sctp_prot =
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
- .get_port = sctp_get_port,
+ .no_autobind = true,
.obj_size = sizeof(struct sctp_sock),
.useroffset = offsetof(struct sctp_sock, subscribe),
.usersize = offsetof(struct sctp_sock, initmsg) -
offsetof(struct sctp_sock, subscribe) +
- sizeof_field(struct sctp_sock, initmsg),
+ sizeof_member(struct sctp_sock, initmsg),
.sysctl_mem = sysctl_sctp_mem,
.sysctl_rmem = sysctl_sctp_rmem,
.sysctl_wmem = sysctl_sctp_wmem,
@@@ -9657,12 -9542,12 +9657,12 @@@ struct proto sctpv6_prot =
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
- .get_port = sctp_get_port,
+ .no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
- sizeof_field(struct sctp6_sock, sctp.initmsg),
+ sizeof_member(struct sctp6_sock, sctp.initmsg),
.sysctl_mem = sysctl_sctp_mem,
.sysctl_rmem = sysctl_sctp_rmem,
.sysctl_wmem = sysctl_sctp_wmem,
diff --combined net/unix/af_unix.c
index 7cfdce10de36,ee9b2d8684c3..f0a074356012
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -284,9 -284,11 +284,9 @@@ static struct sock *__unix_find_socket_
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
- goto found;
+ return s;
}
- s = NULL;
-found:
- return s;
+ return NULL;
}
static inline struct sock *unix_find_socket_byname(struct net *net,
@@@ -644,9 -646,6 +644,9 @@@ static __poll_t unix_poll(struct file *
static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
+#ifdef CONFIG_COMPAT
+static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+#endif
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
@@@ -688,9 -687,6 +688,9 @@@ static const struct proto_ops unix_stre
.getname = unix_getname,
.poll = unix_poll,
.ioctl = unix_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = unix_compat_ioctl,
+#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
.setsockopt = sock_no_setsockopt,
@@@ -714,9 -710,6 +714,9 @@@ static const struct proto_ops unix_dgra
.getname = unix_getname,
.poll = unix_dgram_poll,
.ioctl = unix_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = unix_compat_ioctl,
+#endif
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.setsockopt = sock_no_setsockopt,
@@@ -739,9 -732,6 +739,9 @@@ static const struct proto_ops unix_seqp
.getname = unix_getname,
.poll = unix_dgram_poll,
.ioctl = unix_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = unix_compat_ioctl,
+#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
.setsockopt = sock_no_setsockopt,
@@@ -2592,13 -2582,6 +2592,13 @@@ static int unix_ioctl(struct socket *so
return err;
}
+#ifdef CONFIG_COMPAT
+static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
@@@ -2616,7 -2599,7 +2616,7 @@@
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
@@@ -2645,7 -2628,7 +2645,7 @@@ static __poll_t unix_dgram_poll(struct
mask = 0;
/* exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
@@@ -2655,7 -2638,7 +2655,7 @@@
mask |= EPOLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
@@@ -2865,7 -2848,7 +2865,7 @@@ static int __init af_unix_init(void
{
int rc = -1;
- BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_member(struct sk_buff, cb));
rc = proto_register(&unix_proto, 1);
if (rc != 0) {
diff --combined security/integrity/ima/ima_policy.c
index f19a895ad7cd,18bf8e2d4f95..936cba30d27c
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@@ -45,7 -45,7 +45,7 @@@
#define DONT_HASH 0x0200
#define INVALID_PCR(a) (((a) < 0) || \
- (a) >= (FIELD_SIZEOF(struct integrity_iint_cache, measured_pcrs) * 8))
+ (a) >= (sizeof_member(struct integrity_iint_cache, measured_pcrs) * 8))
int ima_policy_flag;
static int temp_ima_appraise;
@@@ -274,7 -274,7 +274,7 @@@ static struct ima_rule_entry *ima_lsm_c
* lsm rules can change
*/
memcpy(nentry, entry, sizeof(*nentry));
- memset(nentry->lsm, 0, FIELD_SIZEOF(struct ima_rule_entry, lsm));
+ memset(nentry->lsm, 0, sizeof_member(struct ima_rule_entry, lsm));
for (i = 0; i < MAX_LSM_RULES; i++) {
if (!entry->lsm[i].rule)
@@@ -765,8 -765,8 +765,8 @@@ enum
Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq,
Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt,
Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt,
- Opt_appraise_type, Opt_permit_directio,
- Opt_pcr, Opt_template, Opt_err
+ Opt_appraise_type, Opt_appraise_flag,
+ Opt_permit_directio, Opt_pcr, Opt_template, Opt_err
};
static const match_table_t policy_tokens = {
@@@ -798,7 -798,6 +798,7 @@@
{Opt_euid_lt, "euid<%s"},
{Opt_fowner_lt, "fowner<%s"},
{Opt_appraise_type, "appraise_type=%s"},
+ {Opt_appraise_flag, "appraise_flag=%s"},
{Opt_permit_directio, "permit_directio"},
{Opt_pcr, "pcr=%s"},
{Opt_template, "template=%s"},
@@@ -1173,11 -1172,6 +1173,11 @@@ static int ima_parse_rule(char *rule, s
else
result = -EINVAL;
break;
+ case Opt_appraise_flag:
+ ima_log_string(ab, "appraise_flag", args[0].from);
+ if (strstr(args[0].from, "blacklist"))
+ entry->flags |= IMA_CHECK_BLACKLIST;
+ break;
case Opt_permit_directio:
entry->flags |= IMA_PERMIT_DIRECTIO;
break;
@@@ -1506,8 -1500,6 +1506,8 @@@ int ima_policy_show(struct seq_file *m
else
seq_puts(m, "appraise_type=imasig ");
}
+ if (entry->flags & IMA_CHECK_BLACKLIST)
+ seq_puts(m, "appraise_flag=check_blacklist ");
if (entry->flags & IMA_PERMIT_DIRECTIO)
seq_puts(m, "permit_directio ");
rcu_read_unlock();
diff --combined sound/soc/codecs/hdmi-codec.c
index f8b5b960e597,c8b1ac22b2b2..841d1c608fe4
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@@ -274,7 -274,7 +274,7 @@@ struct hdmi_codec_priv
uint8_t eld[MAX_ELD_BYTES];
struct snd_pcm_chmap *chmap_info;
unsigned int chmap_idx;
- struct mutex lock;
+ unsigned long busy;
struct snd_soc_jack *jack;
unsigned int jack_status;
};
@@@ -292,7 -292,7 +292,7 @@@ static int hdmi_eld_ctl_info(struct snd
struct snd_ctl_elem_info *uinfo)
{
uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES;
- uinfo->count = FIELD_SIZEOF(struct hdmi_codec_priv, eld);
+ uinfo->count = sizeof_member(struct hdmi_codec_priv, eld);
return 0;
}
@@@ -390,8 -390,8 +390,8 @@@ static int hdmi_codec_startup(struct sn
struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
int ret = 0;
- ret = mutex_trylock(&hcp->lock);
- if (!ret) {
+ ret = test_and_set_bit(0, &hcp->busy);
+ if (ret) {
dev_err(dai->dev, "Only one simultaneous stream supported!\n");
return -EINVAL;
}
@@@ -419,7 -419,7 +419,7 @@@
err:
/* Release the exclusive lock on error */
- mutex_unlock(&hcp->lock);
+ clear_bit(0, &hcp->busy);
return ret;
}
@@@ -431,7 -431,7 +431,7 @@@ static void hdmi_codec_shutdown(struct
hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN;
hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data);
- mutex_unlock(&hcp->lock);
+ clear_bit(0, &hcp->busy);
}
static int hdmi_codec_hw_params(struct snd_pcm_substream *substream,
@@@ -811,6 -811,8 +811,6 @@@ static int hdmi_codec_probe(struct plat
return -ENOMEM;
hcp->hcd = *hcd;
- mutex_init(&hcp->lock);
-
daidrv = devm_kcalloc(dev, dai_count, sizeof(*daidrv), GFP_KERNEL);
if (!daidrv)
return -ENOMEM;
diff --combined virt/kvm/kvm_main.c
index 00268290dcbd,777d3b125072..74003e85f371
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -50,7 -50,6 +50,7 @@@
#include <linux/bsearch.h>
#include <linux/io.h>
#include <linux/lockdep.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/ioctl.h>
@@@ -122,22 -121,9 +122,22 @@@ static long kvm_vcpu_compat_ioctl(struc
unsigned long arg);
#define KVM_COMPAT(c) .compat_ioctl = (c)
#else
+/*
+ * For architectures that don't implement a compat infrastructure,
+ * adopt a double line of defense:
+ * - Prevent a compat task from opening /dev/kvm
+ * - If the open has been done by a 64bit task, and the KVM fd
+ * passed to a compat task, let the ioctls fail.
+ */
static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg) { return -EINVAL; }
-#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl
+
+static int kvm_no_compat_open(struct inode *inode, struct file *file)
+{
+ return is_compat_task() ? -ENODEV : 0;
+}
+#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
+ .open = kvm_no_compat_open
#endif
static int hardware_enable_all(void);
static void hardware_disable_all(void);
@@@ -163,30 -149,10 +163,30 @@@ __weak int kvm_arch_mmu_notifier_invali
return 0;
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@@ -659,28 -625,10 +659,28 @@@ static int kvm_create_vm_debugfs(struc
return 0;
}
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
- int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
+ int r = -ENOMEM;
+ int i;
if (!kvm)
return ERR_PTR(-ENOMEM);
@@@ -692,50 -640,45 +692,50 @@@
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
- refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);
- r = kvm_arch_init_vm(kvm, type);
- if (r)
- goto out_err_no_disable;
-
- r = hardware_enable_all();
- if (r)
- goto out_err_no_disable;
-
-#ifdef CONFIG_HAVE_KVM_IRQFD
- INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
-#endif
-
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
- r = -ENOMEM;
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
+
+ refcount_set(&kvm->users_count, 1);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_memslots *slots = kvm_alloc_memslots();
+
if (!slots)
- goto out_err_no_srcu;
+ goto out_err_no_arch_destroy_vm;
/* Generations must be different for each address space. */
slots->generation = i;
rcu_assign_pointer(kvm->memslots[i], slots);
}
- if (init_srcu_struct(&kvm->srcu))
- goto out_err_no_srcu;
- if (init_srcu_struct(&kvm->irq_srcu))
- goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
rcu_assign_pointer(kvm->buses[i],
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
if (!kvm->buses[i])
- goto out_err;
+ goto out_err_no_arch_destroy_vm;
}
+ r = kvm_arch_init_vm(kvm, type);
+ if (r)
+ goto out_err_no_arch_destroy_vm;
+
+ r = hardware_enable_all();
+ if (r)
+ goto out_err_no_disable;
+
+#ifdef CONFIG_HAVE_KVM_IRQFD
+ INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
+#endif
+
r = kvm_init_mmu_notifier(kvm);
+ if (r)
+ goto out_err_no_mmu_notifier;
+
+ r = kvm_arch_post_init_vm(kvm);
if (r)
goto out_err;
@@@ -748,24 -691,17 +748,24 @@@
return kvm;
out_err:
- cleanup_srcu_struct(&kvm->irq_srcu);
-out_err_no_irq_srcu:
- cleanup_srcu_struct(&kvm->srcu);
-out_err_no_srcu:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ if (kvm->mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
hardware_disable_all();
out_err_no_disable:
- refcount_set(&kvm->users_count, 0);
+ kvm_arch_destroy_vm(kvm);
+out_err_no_arch_destroy_vm:
+ WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
+ cleanup_srcu_struct(&kvm->srcu);
+out_err_no_srcu:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@@ -797,8 -733,6 +797,8 @@@ static void kvm_destroy_vm(struct kvm *
mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
mutex_unlock(&kvm_lock);
+ kvm_arch_pre_destroy_vm(kvm);
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@@ -838,18 -772,6 +838,18 @@@ void kvm_put_kvm(struct kvm *kvm
}
EXPORT_SYMBOL_GPL(kvm_put_kvm);
+/*
+ * Used to put a reference that was taken on behalf of an object associated
+ * with a user-visible file descriptor, e.g. a vcpu or device, if installation
+ * of the new file descriptor fails and the reference cannot be transferred to
+ * its final owner. In such cases, the caller is still actively using @kvm and
+ * will fail miserably if the refcount unexpectedly hits zero.
+ */
+void kvm_put_kvm_no_destroy(struct kvm *kvm)
+{
+ WARN_ON(refcount_dec_and_test(&kvm->users_count));
+}
+EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy);
static int kvm_vm_release(struct inode *inode, struct file *filp)
{
@@@ -1931,7 -1853,7 +1931,7 @@@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
SetPageDirty(page);
@@@ -1941,7 -1863,7 +1941,7 @@@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty)
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@@ -2438,23 -2360,20 +2438,23 @@@ out
kvm_arch_vcpu_unblocking(vcpu);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- if (!vcpu_valid_wakeup(vcpu))
- shrink_halt_poll_ns(vcpu);
- else if (halt_poll_ns) {
- if (block_ns <= vcpu->halt_poll_ns)
- ;
- /* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ if (!kvm_arch_no_poll(vcpu)) {
+ if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- /* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < halt_poll_ns &&
- block_ns < halt_poll_ns)
- grow_halt_poll_ns(vcpu);
- } else
- vcpu->halt_poll_ns = 0;
+ } else if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ } else {
+ vcpu->halt_poll_ns = 0;
+ }
+ }
trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
kvm_arch_vcpu_block_finish(vcpu);
@@@ -2751,18 -2670,17 +2751,18 @@@ static int kvm_vm_ioctl_create_vcpu(str
goto unlock_vcpu_destroy;
}
- BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
+ vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
+ BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0) {
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
goto unlock_vcpu_destroy;
}
- kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
+ kvm->vcpus[vcpu->vcpu_idx] = vcpu;
/*
* Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus
@@@ -3128,14 -3046,14 +3128,14 @@@ struct kvm_device *kvm_device_from_filp
return filp->private_data;
}
-static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+static const struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
#ifdef CONFIG_KVM_MPIC
[KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
[KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
#endif
};
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type)
{
if (type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENOSPC;
@@@ -3156,7 -3074,7 +3156,7 @@@ void kvm_unregister_device_ops(u32 type
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
- struct kvm_device_ops *ops = NULL;
+ const struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int type;
@@@ -3196,7 -3114,7 +3196,7 @@@
kvm_get_kvm(kvm);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);
@@@ -4354,12 -4272,12 +4354,12 @@@ int kvm_init(void *opaque, unsigned vcp
r = kvm_arch_hardware_setup();
if (r < 0)
- goto out_free_0a;
+ goto out_free_1;
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_processor_compat, &r, 1);
if (r < 0)
- goto out_free_1;
+ goto out_free_2;
}
r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
@@@ -4375,7 -4293,7 +4375,7 @@@
kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
SLAB_ACCOUNT,
offsetof(struct kvm_vcpu, arch),
- sizeof_field(struct kvm_vcpu, arch),
+ sizeof_member(struct kvm_vcpu, arch),
NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
@@@ -4416,8 -4334,9 +4416,8 @@@ out_free_3
unregister_reboot_notifier(&kvm_reboot_notifier);
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
out_free_2:
-out_free_1:
kvm_arch_hardware_unsetup();
-out_free_0a:
+out_free_1:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
kvm_irqfd_exit();
@@@ -4445,86 -4364,3 +4445,86 @@@ void kvm_exit(void
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+ struct kvm *kvm;
+ struct task_struct *parent;
+ struct completion init_done;
+ kvm_vm_thread_fn_t thread_fn;
+ uintptr_t data;
+ int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+ /*
+ * The init_context is allocated on the stack of the parent thread, so
+ * we have to locally copy anything that is needed beyond initialization
+ */
+ struct kvm_vm_worker_thread_context *init_context = context;
+ struct kvm *kvm = init_context->kvm;
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+ uintptr_t data = init_context->data;
+ int err;
+
+ err = kthread_park(current);
+ /* kthread_park(current) is never supposed to return an error */
+ WARN_ON(err != 0);
+ if (err)
+ goto init_complete;
+
+ err = cgroup_attach_task_all(init_context->parent, current);
+ if (err) {
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+ __func__, err);
+ goto init_complete;
+ }
+
+ set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+ init_context->err = err;
+ complete(&init_context->init_done);
+ init_context = NULL;
+
+ if (err)
+ return err;
+
+ /* Wait to be woken up by the spawner before proceeding. */
+ kthread_parkme();
+
+ if (!kthread_should_stop())
+ err = thread_fn(kvm, data);
+
+ return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr)
+{
+ struct kvm_vm_worker_thread_context init_context = {};
+ struct task_struct *thread;
+
+ *thread_ptr = NULL;
+ init_context.kvm = kvm;
+ init_context.parent = current;
+ init_context.thread_fn = thread_fn;
+ init_context.data = data;
+ init_completion(&init_context.init_done);
+
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
+ "%s-%d", name, task_pid_nr(current));
+ if (IS_ERR(thread))
+ return PTR_ERR(thread);
+
+ /* kthread_run is never supposed to return NULL */
+ WARN_ON(thread == NULL);
+
+ wait_for_completion(&init_context.init_done);
+
+ if (!init_context.err)
+ *thread_ptr = thread;
+
+ return init_context.err;
+}
--
LinuxNextTracking