The following commit has been merged in the master branch: commit ef2cc88e2a205b8a11a19e78db63a70d3728cdf5 Merge: 937d6eefc716a9071f0e3bada19200de1bb9d048 65309ef6b258f5a7b57c1033a82ba2aba5c434cc Author: Linus Torvalds torvalds@linux-foundation.org Date: Mon Dec 2 13:37:02 2019 -0800
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley: "This is mostly update of the usual drivers: aacraid, ufs, zfcp, NCR5380, lpfc, qla2xxx, smartpqi, hisi_sas, target, mpt3sas, pm80xx plus a whole load of minor updates and fixes.
The major core changes are Al Viro's reworking of sg's handling of copy to/from user, Ming Lei's removal of the host busy counter to avoid contention in the multiqueue case and Damien Le Moal's fixing of residual tracking across error handling"
* tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (251 commits) scsi: bnx2fc: timeout calculation invalid for bnx2fc_eh_abort() scsi: target: core: Fix a pr_debug() argument scsi: iscsi: Don't send data to unbound connection scsi: target: iscsi: Wait for all commands to finish before freeing a session scsi: target: core: Release SPC-2 reservations when closing a session scsi: target: core: Document target_cmd_size_check() scsi: bnx2i: fix potential use after free Revert "scsi: qla2xxx: Fix memory leak when sending I/O fails" scsi: NCR5380: Add disconnect_mask module parameter scsi: NCR5380: Unconditionally clear ICR after do_abort() scsi: NCR5380: Call scsi_set_resid() on command completion scsi: scsi_debug: num_tgts must be >= 0 scsi: lpfc: use hdwq assigned cpu for allocation scsi: arcmsr: fix indentation issues scsi: qla4xxx: fix double free bug scsi: pm80xx: Modified the logic to collect fatal dump scsi: pm80xx: Tie the interrupt name to the module instance scsi: pm80xx: Controller fatal error through sysfs scsi: pm80xx: Do not request 12G sas speeds scsi: pm80xx: Cleanup command when a reset times out ...
diff --combined drivers/scsi/cxlflash/main.c index 2dbf35f82787,4f01ef5c7f69..fbd2ae40dab4 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@@ -44,14 -44,12 +44,12 @@@ static void process_cmd_err(struct afu_ struct afu *afu = cmd->parent; struct cxlflash_cfg *cfg = afu->parent; struct device *dev = &cfg->dev->dev; - struct sisl_ioarcb *ioarcb; struct sisl_ioasa *ioasa; u32 resid;
if (unlikely(!cmd)) return;
- ioarcb = &(cmd->rcb); ioasa = &(cmd->sa);
if (ioasa->rc.flags & SISL_RC_FLAGS_UNDERRUN) { @@@ -3593,7 -3591,7 +3591,7 @@@ static const struct file_operations cxl .owner = THIS_MODULE, .open = cxlflash_chr_open, .unlocked_ioctl = cxlflash_chr_ioctl, - .compat_ioctl = cxlflash_chr_ioctl, + .compat_ioctl = compat_ptr_ioctl, };
/** diff --combined drivers/scsi/lpfc/lpfc_init.c index e8813d26e594,e9323889f199..dc6f7c4b54c6 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@@ -40,6 -40,8 +40,8 @@@ #include <linux/irq.h> #include <linux/bitops.h> #include <linux/crash_dump.h> + #include <linux/cpu.h> + #include <linux/cpuhotplug.h>
#include <scsi/scsi.h> #include <scsi/scsi_device.h> @@@ -66,9 -68,13 +68,13 @@@ #include "lpfc_version.h" #include "lpfc_ids.h"
+ static enum cpuhp_state lpfc_cpuhp_state; /* Used when mapping IRQ vectors in a driver centric manner */ static uint32_t lpfc_present_cpu;
+ static void __lpfc_cpuhp_remove(struct lpfc_hba *phba); + static void lpfc_cpuhp_remove(struct lpfc_hba *phba); + static void lpfc_cpuhp_add(struct lpfc_hba *phba); static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); static int lpfc_post_rcv_buf(struct lpfc_hba *); static int lpfc_sli4_queue_verify(struct lpfc_hba *); @@@ -1235,10 -1241,9 +1241,9 @@@ lpfc_hb_eq_delay_work(struct work_struc struct lpfc_hba, eq_delay_work); struct lpfc_eq_intr_info *eqi, *eqi_new; struct lpfc_queue *eq, *eq_next; - unsigned char *eqcnt = NULL; + unsigned char *ena_delay = NULL; uint32_t usdelay; int i; - bool update = false;
if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING) return; @@@ -1247,44 -1252,36 +1252,36 @@@ phba->pport->fc_flag & FC_OFFLINE_MODE) goto requeue;
- eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char), - GFP_KERNEL); - if (!eqcnt) + ena_delay = kcalloc(phba->sli4_hba.num_possible_cpu, sizeof(*ena_delay), + GFP_KERNEL); + if (!ena_delay) goto requeue;
- if (phba->cfg_irq_chann > 1) { - /* Loop thru all IRQ vectors */ - for (i = 0; i < phba->cfg_irq_chann; i++) { - /* Get the EQ corresponding to the IRQ vector */ - eq = phba->sli4_hba.hba_eq_hdl[i].eq; - if (!eq) - continue; - if (eq->q_mode) { - update = true; - break; - } - if (eqcnt[eq->last_cpu] < 2) - eqcnt[eq->last_cpu]++; + for (i = 0; i < phba->cfg_irq_chann; i++) { + /* Get the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[i].eq; + if (!eq) + continue; + if (eq->q_mode || eq->q_flag & HBA_EQ_DELAY_CHK) { + eq->q_flag &= ~HBA_EQ_DELAY_CHK; + ena_delay[eq->last_cpu] = 1; } - } else - update = true; + }
for_each_present_cpu(i) { eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i); - if (!update && eqcnt[i] < 2) { - eqi->icnt = 0; - continue; + if (ena_delay[i]) { + usdelay = (eqi->icnt >> 10) * LPFC_EQ_DELAY_STEP; + if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) + usdelay = LPFC_MAX_AUTO_EQ_DELAY; + } else { + usdelay = 0; }
- usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) * - LPFC_EQ_DELAY_STEP; - if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) - usdelay = LPFC_MAX_AUTO_EQ_DELAY; - eqi->icnt = 0;
list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) { - if (eq->last_cpu != i) { + if (unlikely(eq->last_cpu != i)) { eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info, eq->last_cpu); list_move_tail(&eq->cpu_list, &eqi_new->list); @@@ -1296,7 -1293,7 +1293,7 @@@ } }
- kfree(eqcnt); + kfree(ena_delay);
requeue: queue_delayed_work(phba->wq, &phba->eq_delay_work, @@@ -3053,11 -3050,12 +3050,12 @@@ lpfc_sli4_node_prep(struct lpfc_hba *ph continue; } ndlp->nlp_rpi = rpi; - lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x x%px\n", ndlp->nlp_rpi, - ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0009 Assign RPI x%x to ndlp x%px " + "DID:x%06x flg:x%x map:x%x\n", + ndlp->nlp_rpi, ndlp, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_usg_map); } } lpfc_destroy_vport_work_array(phba, vports); @@@ -3387,6 -3385,8 +3385,8 @@@ lpfc_online(struct lpfc_hba *phba if (phba->cfg_xri_rebalancing) lpfc_create_multixri_pools(phba);
+ lpfc_cpuhp_add(phba); + lpfc_unblock_mgmt_io(phba); return 0; } @@@ -3453,10 -3453,15 +3453,15 @@@ lpfc_offline_prep(struct lpfc_hba *phba list_for_each_entry_safe(ndlp, next_ndlp, &vports[i]->fc_nodes, nlp_listp) { - if (!NLP_CHK_NODE_ACT(ndlp)) - continue; - if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + if ((!NLP_CHK_NODE_ACT(ndlp)) || + ndlp->nlp_state == NLP_STE_UNUSED_NODE) { + /* Driver must assume RPI is invalid for + * any unused or inactive node. + */ + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; continue; + } + if (ndlp->nlp_type & NLP_FABRIC) { lpfc_disc_state_machine(vports[i], ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); @@@ -3472,16 -3477,16 +3477,16 @@@ * comes back online. */ if (phba->sli_rev == LPFC_SLI_REV4) { - lpfc_printf_vlog(ndlp->vport, - KERN_INFO, LOG_NODE, - "0011 lpfc_offline: " - "ndlp:x%px did %x " - "usgmap:x%x rpi:%x\n", - ndlp, ndlp->nlp_DID, - ndlp->nlp_usg_map, - ndlp->nlp_rpi); - + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0011 Free RPI x%x on " + "ndlp:x%px did x%x " + "usgmap:x%x\n", + ndlp->nlp_rpi, ndlp, + ndlp->nlp_DID, + ndlp->nlp_usg_map); lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi); + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; } lpfc_unreg_rpi(vports[i], ndlp); } @@@ -3545,6 -3550,7 +3550,7 @@@ lpfc_offline(struct lpfc_hba *phba spin_unlock_irq(shost->host_lock); } lpfc_destroy_vport_work_array(phba, vports); + __lpfc_cpuhp_remove(phba);
if (phba->cfg_xri_rebalancing) lpfc_destroy_multixri_pools(phba); @@@ -5283,10 -5289,10 +5289,10 @@@ lpfc_sli4_async_sli_evt(struct lpfc_hb evt_type = bf_get(lpfc_trailer_type, acqe_sli);
lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "2901 Async SLI event - Event Data1:x%08x Event Data2:" - "x%08x SLI Event Type:%d\n", + "2901 Async SLI event - Type:%d, Event Data: x%08x " + "x%08x x%08x x%08x\n", evt_type, acqe_sli->event_data1, acqe_sli->event_data2, - evt_type); + acqe_sli->reserved, acqe_sli->trailer);
port_name = phba->Port[0]; if (port_name == 0x00) @@@ -5433,11 -5439,26 +5439,26 @@@ "Event Data1:x%08x Event Data2: x%08x\n", acqe_sli->event_data1, acqe_sli->event_data2); break; + case LPFC_SLI_EVENT_TYPE_MISCONF_FAWWN: + /* Misconfigured WWN. Reports that the SLI Port is configured + * to use FA-WWN, but the attached device doesn’t support it. + * No driver action is required. + * Event Data1 - N.A, Event Data2 - N.A + */ + lpfc_log_msg(phba, KERN_WARNING, LOG_SLI, + "2699 Misconfigured FA-WWN - Attached device does " + "not support FA-WWN\n"); + break; + case LPFC_SLI_EVENT_TYPE_EEPROM_FAILURE: + /* EEPROM failure. No driver action is required */ + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2518 EEPROM failure - " + "Event Data1: x%08x Event Data2: x%08x\n", + acqe_sli->event_data1, acqe_sli->event_data2); + break; default: lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "3193 Async SLI event - Event Data1:x%08x Event Data2:" - "x%08x SLI Event Type:%d\n", - acqe_sli->event_data1, acqe_sli->event_data2, + "3193 Unrecognized SLI event, type: 0x%x", evt_type); break; } @@@ -5975,6 -5996,29 +5996,29 @@@ static void lpfc_log_intr_mode(struct l return; }
+ /** + * lpfc_cpumask_of_node_init - initalizes cpumask of phba's NUMA node + * @phba: Pointer to HBA context object. + * + **/ + static void + lpfc_cpumask_of_node_init(struct lpfc_hba *phba) + { + unsigned int cpu, numa_node; + struct cpumask *numa_mask = &phba->sli4_hba.numa_mask; + + cpumask_clear(numa_mask); + + /* Check if we're a NUMA architecture */ + numa_node = dev_to_node(&phba->pcidev->dev); + if (numa_node == NUMA_NO_NODE) + return; + + for_each_possible_cpu(cpu) + if (cpu_to_node(cpu) == numa_node) + cpumask_set_cpu(cpu, numa_mask); + } + /** * lpfc_enable_pci_dev - Enable a generic PCI device. * @phba: pointer to lpfc hba data structure. @@@ -6418,6 -6462,7 +6462,7 @@@ lpfc_sli4_driver_resource_setup(struct phba->sli4_hba.num_present_cpu = lpfc_present_cpu; phba->sli4_hba.num_possible_cpu = num_possible_cpus(); phba->sli4_hba.curr_disp_cpu = 0; + lpfc_cpumask_of_node_init(phba);
/* Get all the module params for configuring this host */ lpfc_get_cfgparam(phba); @@@ -6953,6 -6998,7 +6998,7 @@@ lpfc_sli4_driver_resource_unset(struct phba->sli4_hba.num_possible_cpu = 0; phba->sli4_hba.num_present_cpu = 0; phba->sli4_hba.curr_disp_cpu = 0; + cpumask_clear(&phba->sli4_hba.numa_mask);
/* Free memory allocated for fast-path work queue handles */ kfree(phba->sli4_hba.hba_eq_hdl); @@@ -7126,7 -7172,7 +7172,7 @@@ lpfc_init_iocb_list(struct lpfc_hba *ph if (iocbq_entry == NULL) { printk(KERN_ERR "%s: only allocated %d iocbs of " "expected %d count. Unloading driver.\n", - __func__, i, LPFC_IOCB_LIST_CNT); + __func__, i, iocb_count); goto out_free_iocbq; }
@@@ -7545,18 -7591,10 +7591,10 @@@ lpfc_create_shost(struct lpfc_hba *phba
if (phba->nvmet_support) { /* Only 1 vport (pport) will support NVME target */ - if (phba->txrdy_payload_pool == NULL) { - phba->txrdy_payload_pool = dma_pool_create( - "txrdy_pool", &phba->pcidev->dev, - TXRDY_PAYLOAD_LEN, 16, 0); - if (phba->txrdy_payload_pool) { - phba->targetport = NULL; - phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME; - lpfc_printf_log(phba, KERN_INFO, - LOG_INIT | LOG_NVME_DISC, - "6076 NVME Target Found\n"); - } - } + phba->targetport = NULL; + phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME_DISC, + "6076 NVME Target Found\n"); }
lpfc_debugfs_initialize(vport); @@@ -8235,6 -8273,94 +8273,94 @@@ lpfc_destroy_bootstrap_mbox(struct lpfc memset(&phba->sli4_hba.bmbx, 0, sizeof(struct lpfc_bmbx)); }
+ static const char * const lpfc_topo_to_str[] = { + "Loop then P2P", + "Loopback", + "P2P Only", + "Unsupported", + "Loop Only", + "Unsupported", + "P2P then Loop", + }; + + /** + * lpfc_map_topology - Map the topology read from READ_CONFIG + * @phba: pointer to lpfc hba data structure. + * @rdconf: pointer to read config data + * + * This routine is invoked to map the topology values as read + * from the read config mailbox command. If the persistent + * topology feature is supported, the firmware will provide the + * saved topology information to be used in INIT_LINK + * + **/ + #define LINK_FLAGS_DEF 0x0 + #define LINK_FLAGS_P2P 0x1 + #define LINK_FLAGS_LOOP 0x2 + static void + lpfc_map_topology(struct lpfc_hba *phba, struct lpfc_mbx_read_config *rd_config) + { + u8 ptv, tf, pt; + + ptv = bf_get(lpfc_mbx_rd_conf_ptv, rd_config); + tf = bf_get(lpfc_mbx_rd_conf_tf, rd_config); + pt = bf_get(lpfc_mbx_rd_conf_pt, rd_config); + + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "2027 Read Config Data : ptv:0x%x, tf:0x%x pt:0x%x", + ptv, tf, pt); + if (!ptv) { + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2019 FW does not support persistent topology " + "Using driver parameter defined value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + return; + } + /* FW supports persistent topology - override module parameter value */ + phba->hba_flag |= HBA_PERSISTENT_TOPO; + switch (phba->pcidev->device) { + case PCI_DEVICE_ID_LANCER_G7_FC: + if (tf || (pt == LINK_FLAGS_LOOP)) { + /* Invalid values from FW - use driver params */ + phba->hba_flag &= ~HBA_PERSISTENT_TOPO; + } else { + /* Prism only supports PT2PT topology */ + phba->cfg_topology = FLAGS_TOPOLOGY_MODE_PT_PT; + } + break; + case PCI_DEVICE_ID_LANCER_G6_FC: + if (!tf) { + phba->cfg_topology = ((pt == LINK_FLAGS_LOOP) + ? FLAGS_TOPOLOGY_MODE_LOOP + : FLAGS_TOPOLOGY_MODE_PT_PT); + } else { + phba->hba_flag &= ~HBA_PERSISTENT_TOPO; + } + break; + default: /* G5 */ + if (tf) { + /* If topology failover set - pt is '0' or '1' */ + phba->cfg_topology = (pt ? FLAGS_TOPOLOGY_MODE_PT_LOOP : + FLAGS_TOPOLOGY_MODE_LOOP_PT); + } else { + phba->cfg_topology = ((pt == LINK_FLAGS_P2P) + ? FLAGS_TOPOLOGY_MODE_PT_PT + : FLAGS_TOPOLOGY_MODE_LOOP); + } + break; + } + if (phba->hba_flag & HBA_PERSISTENT_TOPO) { + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "2020 Using persistent topology value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2021 Invalid topology values from FW " + "Using driver parameter defined value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + } + } + /** * lpfc_sli4_read_config - Get the config parameters. * @phba: pointer to lpfc hba data structure. @@@ -8346,6 -8472,7 +8472,7 @@@ lpfc_sli4_read_config(struct lpfc_hba * phba->max_vpi = (phba->sli4_hba.max_cfg_param.max_vpi > 0) ? (phba->sli4_hba.max_cfg_param.max_vpi - 1) : 0; phba->max_vports = phba->max_vpi; + lpfc_map_topology(phba, rd_config); lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "2003 cfg params Extents? %d " "XRI(B:%d M:%d), " @@@ -8619,8 -8746,8 +8746,8 @@@ lpfc_sli4_queue_verify(struct lpfc_hba */
if (phba->nvmet_support) { - if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq) - phba->cfg_nvmet_mrq = phba->cfg_irq_chann; + if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq) + phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX) phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX; } @@@ -9055,6 -9182,7 +9182,6 @@@ lpfc_sli4_queue_create(struct lpfc_hba } }
-#if defined(BUILD_NVME) /* Clear NVME stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { @@@ -9062,6 -9190,7 +9189,6 @@@ sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat)); } } -#endif
/* Clear SCSI stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { @@@ -9160,6 -9289,8 +9287,8 @@@ lpfc_sli4_queue_destroy(struct lpfc_hb } spin_unlock_irq(&phba->hbalock);
+ lpfc_sli4_cleanup_poll_list(phba); + /* Release HBA eqs */ if (phba->sli4_hba.hdwq) lpfc_sli4_release_hdwq(phba); @@@ -10581,7 -10712,6 +10710,6 @@@ lpfc_find_cpu_handle(struct lpfc_hba *p */ if ((match == LPFC_FIND_BY_EQ) && (cpup->flag & LPFC_CPU_FIRST_IRQ) && - (cpup->irq != LPFC_VECTOR_MAP_EMPTY) && (cpup->eq == id)) return cpu;
@@@ -10619,6 -10749,75 +10747,75 @@@ lpfc_find_hyper(struct lpfc_hba *phba, } #endif
+ /* + * lpfc_assign_eq_map_info - Assigns eq for vector_map structure + * @phba: pointer to lpfc hba data structure. + * @eqidx: index for eq and irq vector + * @flag: flags to set for vector_map structure + * @cpu: cpu used to index vector_map structure + * + * The routine assigns eq info into vector_map structure + */ + static inline void + lpfc_assign_eq_map_info(struct lpfc_hba *phba, uint16_t eqidx, uint16_t flag, + unsigned int cpu) + { + struct lpfc_vector_map_info *cpup = &phba->sli4_hba.cpu_map[cpu]; + struct lpfc_hba_eq_hdl *eqhdl = lpfc_get_eq_hdl(eqidx); + + cpup->eq = eqidx; + cpup->flag |= flag; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3336 Set Affinity: CPU %d irq %d eq %d flag x%x\n", + cpu, eqhdl->irq, cpup->eq, cpup->flag); + } + + /** + * lpfc_cpu_map_array_init - Initialize cpu_map structure + * @phba: pointer to lpfc hba data structure. + * + * The routine initializes the cpu_map array structure + */ + static void + lpfc_cpu_map_array_init(struct lpfc_hba *phba) + { + struct lpfc_vector_map_info *cpup; + struct lpfc_eq_intr_info *eqi; + int cpu; + + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; + cpup->core_id = LPFC_VECTOR_MAP_EMPTY; + cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; + cpup->eq = LPFC_VECTOR_MAP_EMPTY; + cpup->flag = 0; + eqi = per_cpu_ptr(phba->sli4_hba.eq_info, cpu); + INIT_LIST_HEAD(&eqi->list); + eqi->icnt = 0; + } + } + + /** + * lpfc_hba_eq_hdl_array_init - Initialize hba_eq_hdl structure + * @phba: pointer to lpfc hba data structure. + * + * The routine initializes the hba_eq_hdl array structure + */ + static void + lpfc_hba_eq_hdl_array_init(struct lpfc_hba *phba) + { + struct lpfc_hba_eq_hdl *eqhdl; + int i; + + for (i = 0; i < phba->cfg_irq_chann; i++) { + eqhdl = lpfc_get_eq_hdl(i); + eqhdl->irq = LPFC_VECTOR_MAP_EMPTY; + eqhdl->phba = phba; + } + } + /** * lpfc_cpu_affinity_check - Check vector CPU affinity mappings * @phba: pointer to lpfc hba data structure. @@@ -10637,22 -10836,10 +10834,10 @@@ lpfc_cpu_affinity_check(struct lpfc_hb int max_core_id, min_core_id; struct lpfc_vector_map_info *cpup; struct lpfc_vector_map_info *new_cpup; - const struct cpumask *maskp; #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; #endif
- /* Init cpu_map array */ - for_each_possible_cpu(cpu) { - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; - cpup->core_id = LPFC_VECTOR_MAP_EMPTY; - cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; - cpup->eq = LPFC_VECTOR_MAP_EMPTY; - cpup->irq = LPFC_VECTOR_MAP_EMPTY; - cpup->flag = 0; - } - max_phys_id = 0; min_phys_id = LPFC_VECTOR_MAP_EMPTY; max_core_id = 0; @@@ -10688,65 -10875,6 +10873,6 @@@ min_core_id = cpup->core_id; }
- for_each_possible_cpu(i) { - struct lpfc_eq_intr_info *eqi = - per_cpu_ptr(phba->sli4_hba.eq_info, i); - - INIT_LIST_HEAD(&eqi->list); - eqi->icnt = 0; - } - - /* This loop sets up all CPUs that are affinitized with a - * irq vector assigned to the driver. All affinitized CPUs - * will get a link to that vectors IRQ and EQ. - * - * NULL affinity mask handling: - * If irq count is greater than one, log an error message. - * If the null mask is received for the first irq, find the - * first present cpu, and assign the eq index to ensure at - * least one EQ is assigned. - */ - for (idx = 0; idx < phba->cfg_irq_chann; idx++) { - /* Get a CPU mask for all CPUs affinitized to this vector */ - maskp = pci_irq_get_affinity(phba->pcidev, idx); - if (!maskp) { - if (phba->cfg_irq_chann > 1) - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3329 No affinity mask found " - "for vector %d (%d)\n", - idx, phba->cfg_irq_chann); - if (!idx) { - cpu = cpumask_first(cpu_present_mask); - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->eq = idx; - cpup->irq = pci_irq_vector(phba->pcidev, idx); - cpup->flag |= LPFC_CPU_FIRST_IRQ; - } - break; - } - - i = 0; - /* Loop through all CPUs associated with vector idx */ - for_each_cpu_and(cpu, maskp, cpu_present_mask) { - /* Set the EQ index and IRQ for that vector */ - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->eq = idx; - cpup->irq = pci_irq_vector(phba->pcidev, idx); - - /* If this is the first CPU thats assigned to this - * vector, set LPFC_CPU_FIRST_IRQ. - */ - if (!i) - cpup->flag |= LPFC_CPU_FIRST_IRQ; - i++; - - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "3336 Set Affinity: CPU %d " - "irq %d eq %d flag x%x\n", - cpu, cpup->irq, cpup->eq, cpup->flag); - } - } - /* After looking at each irq vector assigned to this pcidev, its * possible to see that not ALL CPUs have been accounted for. * Next we will set any unassigned (unaffinitized) cpu map @@@ -10772,7 -10900,7 +10898,7 @@@ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && - (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (new_cpup->eq != LPFC_VECTOR_MAP_EMPTY) && (new_cpup->phys_id == cpup->phys_id)) goto found_same; new_cpu = cpumask_next( @@@ -10785,7 -10913,6 +10911,6 @@@ found_same: /* We found a matching phys_id, so copy the IRQ info */ cpup->eq = new_cpup->eq; - cpup->irq = new_cpup->irq;
/* Bump start_cpu to the next slot to minmize the * chance of having multiple unassigned CPU entries @@@ -10797,9 -10924,10 +10922,10 @@@
lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3337 Set Affinity: CPU %d " - "irq %d from id %d same " + "eq %d from peer cpu %d same " "phys_id (%d)\n", - cpu, cpup->irq, new_cpu, cpup->phys_id); + cpu, cpup->eq, new_cpu, + cpup->phys_id); } }
@@@ -10823,7 -10951,7 +10949,7 @@@ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && - (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY)) + (new_cpup->eq != LPFC_VECTOR_MAP_EMPTY)) goto found_any; new_cpu = cpumask_next( new_cpu, cpu_present_mask); @@@ -10833,13 -10961,12 +10959,12 @@@ /* We should never leave an entry unassigned */ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3339 Set Affinity: CPU %d " - "irq %d UNASSIGNED\n", - cpup->hdwq, cpup->irq); + "eq %d UNASSIGNED\n", + cpup->hdwq, cpup->eq); continue; found_any: /* We found an available entry, copy the IRQ info */ cpup->eq = new_cpup->eq; - cpup->irq = new_cpup->irq;
/* Bump start_cpu to the next slot to minmize the * chance of having multiple unassigned CPU entries @@@ -10851,8 -10978,8 +10976,8 @@@
lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3338 Set Affinity: CPU %d " - "irq %d from id %d (%d/%d)\n", - cpu, cpup->irq, new_cpu, + "eq %d from peer cpu %d (%d/%d)\n", + cpu, cpup->eq, new_cpu, new_cpup->phys_id, new_cpup->core_id); } } @@@ -10873,11 -11000,11 +10998,11 @@@ idx++; lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3333 Set Affinity: CPU %d (phys %d core %d): " - "hdwq %d eq %d irq %d flg x%x\n", + "hdwq %d eq %d flg x%x\n", cpu, cpup->phys_id, cpup->core_id, - cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + cpup->hdwq, cpup->eq, cpup->flag); } - /* Finally we need to associate a hdwq with each cpu_map entry + /* Associate a hdwq with each cpu_map entry * This will be 1 to 1 - hdwq to cpu, unless there are less * hardware queues then CPUs. For that case we will just round-robin * the available hardware queues as they get assigned to CPUs. @@@ -10951,9 -11078,26 +11076,26 @@@ logit: lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3335 Set Affinity: CPU %d (phys %d core %d): " - "hdwq %d eq %d irq %d flg x%x\n", + "hdwq %d eq %d flg x%x\n", cpu, cpup->phys_id, cpup->core_id, - cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + cpup->hdwq, cpup->eq, cpup->flag); + } + + /* + * Initialize the cpu_map slots for not-present cpus in case + * a cpu is hot-added. Perform a simple hdwq round robin assignment. + */ + idx = 0; + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) + continue; + + cpup->hdwq = idx++ % phba->cfg_hdw_queue; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3340 Set Affinity: not present " + "CPU %d hdwq %d\n", + cpu, cpup->hdwq); }
/* The cpu_map array will be used later during initialization @@@ -10962,12 -11106,281 +11104,281 @@@ return; }
+ /** + * lpfc_cpuhp_get_eq + * + * @phba: pointer to lpfc hba data structure. + * @cpu: cpu going offline + * @eqlist: + */ + static void + lpfc_cpuhp_get_eq(struct lpfc_hba *phba, unsigned int cpu, + struct list_head *eqlist) + { + const struct cpumask *maskp; + struct lpfc_queue *eq; + cpumask_t tmp; + u16 idx; + + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + maskp = pci_irq_get_affinity(phba->pcidev, idx); + if (!maskp) + continue; + /* + * if irq is not affinitized to the cpu going + * then we don't need to poll the eq attached + * to it. + */ + if (!cpumask_and(&tmp, maskp, cpumask_of(cpu))) + continue; + /* get the cpus that are online and are affini- + * tized to this irq vector. If the count is + * more than 1 then cpuhp is not going to shut- + * down this vector. Since this cpu has not + * gone offline yet, we need >1. + */ + cpumask_and(&tmp, maskp, cpu_online_mask); + if (cpumask_weight(&tmp) > 1) + continue; + + /* Now that we have an irq to shutdown, get the eq + * mapped to this irq. Note: multiple hdwq's in + * the software can share an eq, but eventually + * only eq will be mapped to this vector + */ + eq = phba->sli4_hba.hba_eq_hdl[idx].eq; + list_add(&eq->_poll_list, eqlist); + } + } + + static void __lpfc_cpuhp_remove(struct lpfc_hba *phba) + { + if (phba->sli_rev != LPFC_SLI_REV4) + return; + + cpuhp_state_remove_instance_nocalls(lpfc_cpuhp_state, + &phba->cpuhp); + /* + * unregistering the instance doesn't stop the polling + * timer. Wait for the poll timer to retire. + */ + synchronize_rcu(); + del_timer_sync(&phba->cpuhp_poll_timer); + } + + static void lpfc_cpuhp_remove(struct lpfc_hba *phba) + { + if (phba->pport->fc_flag & FC_OFFLINE_MODE) + return; + + __lpfc_cpuhp_remove(phba); + } + + static void lpfc_cpuhp_add(struct lpfc_hba *phba) + { + if (phba->sli_rev != LPFC_SLI_REV4) + return; + + rcu_read_lock(); + + if (!list_empty(&phba->poll_list)) { + timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + } + + rcu_read_unlock(); + + cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, + &phba->cpuhp); + } + + static int __lpfc_cpuhp_checks(struct lpfc_hba *phba, int *retval) + { + if (phba->pport->load_flag & FC_UNLOADING) { + *retval = -EAGAIN; + return true; + } + + if (phba->sli_rev != LPFC_SLI_REV4) { + *retval = 0; + return true; + } + + /* proceed with the hotplug */ + return false; + } + + /** + * lpfc_irq_set_aff - set IRQ affinity + * @eqhdl: EQ handle + * @cpu: cpu to set affinity + * + **/ + static inline void + lpfc_irq_set_aff(struct lpfc_hba_eq_hdl *eqhdl, unsigned int cpu) + { + cpumask_clear(&eqhdl->aff_mask); + cpumask_set_cpu(cpu, &eqhdl->aff_mask); + irq_set_status_flags(eqhdl->irq, IRQ_NO_BALANCING); + irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); + } + + /** + * lpfc_irq_clear_aff - clear IRQ affinity + * @eqhdl: EQ handle + * + **/ + static inline void + lpfc_irq_clear_aff(struct lpfc_hba_eq_hdl *eqhdl) + { + cpumask_clear(&eqhdl->aff_mask); + irq_clear_status_flags(eqhdl->irq, IRQ_NO_BALANCING); + irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); + } + + /** + * lpfc_irq_rebalance - rebalances IRQ affinity according to cpuhp event + * @phba: pointer to HBA context object. + * @cpu: cpu going offline/online + * @offline: true, cpu is going offline. false, cpu is coming online. + * + * If cpu is going offline, we'll try our best effort to find the next + * online cpu on the phba's NUMA node and migrate all offlining IRQ affinities. + * + * If cpu is coming online, reaffinitize the IRQ back to the onlineng cpu. + * + * Note: Call only if cfg_irq_numa is enabled, otherwise rely on + * PCI_IRQ_AFFINITY to auto-manage IRQ affinity. + * + **/ + static void + lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) + { + struct lpfc_vector_map_info *cpup; + struct cpumask *aff_mask; + unsigned int cpu_select, cpu_next, idx; + const struct cpumask *numa_mask; + + if (!phba->cfg_irq_numa) + return; + + numa_mask = &phba->sli4_hba.numa_mask; + + if (!cpumask_test_cpu(cpu, numa_mask)) + return; + + cpup = &phba->sli4_hba.cpu_map[cpu]; + + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) + return; + + if (offline) { + /* Find next online CPU on NUMA node */ + cpu_next = cpumask_next_wrap(cpu, numa_mask, cpu, true); + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu_next); + + /* Found a valid CPU */ + if ((cpu_select < nr_cpu_ids) && (cpu_select != cpu)) { + /* Go through each eqhdl and ensure offlining + * cpu aff_mask is migrated + */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + aff_mask = lpfc_get_aff_mask(idx); + + /* Migrate affinity */ + if (cpumask_test_cpu(cpu, aff_mask)) + lpfc_irq_set_aff(lpfc_get_eq_hdl(idx), + cpu_select); + } + } else { + /* Rely on irqbalance if no online CPUs left on NUMA */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) + lpfc_irq_clear_aff(lpfc_get_eq_hdl(idx)); + } + } else { + /* Migrate affinity back to this CPU */ + lpfc_irq_set_aff(lpfc_get_eq_hdl(cpup->eq), cpu); + } + } + + static int lpfc_cpu_offline(unsigned int cpu, struct hlist_node *node) + { + struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp); + struct lpfc_queue *eq, *next; + LIST_HEAD(eqlist); + int retval; + + if (!phba) { + WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id()); + return 0; + } + + if (__lpfc_cpuhp_checks(phba, &retval)) + return retval; + + lpfc_irq_rebalance(phba, cpu, true); + + lpfc_cpuhp_get_eq(phba, cpu, &eqlist); + + /* start polling on these eq's */ + list_for_each_entry_safe(eq, next, &eqlist, _poll_list) { + list_del_init(&eq->_poll_list); + lpfc_sli4_start_polling(eq); + } + + return 0; + } + + static int lpfc_cpu_online(unsigned int cpu, struct hlist_node *node) + { + struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp); + struct lpfc_queue *eq, *next; + unsigned int n; + int retval; + + if (!phba) { + WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id()); + return 0; + } + + if (__lpfc_cpuhp_checks(phba, &retval)) + return retval; + + lpfc_irq_rebalance(phba, cpu, false); + + list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list) { + n = lpfc_find_cpu_handle(phba, eq->hdwq, LPFC_FIND_BY_HDWQ); + if (n == cpu) + lpfc_sli4_stop_polling(eq); + } + + return 0; + } + /** * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device * @phba: pointer to lpfc hba data structure. * * This routine is invoked to enable the MSI-X interrupt vectors to device - * with SLI-4 interface spec. + * with SLI-4 interface spec. It also allocates MSI-X vectors and maps them + * to cpus on the system. + * + * When cfg_irq_numa is enabled, the adapter will only allocate vectors for + * the number of cpus on the same numa node as this adapter. The vectors are + * allocated without requesting OS affinity mapping. A vector will be + * allocated and assigned to each online and offline cpu. If the cpu is + * online, then affinity will be set to that cpu. If the cpu is offline, then + * affinity will be set to the nearest peer cpu within the numa node that is + * online. If there are no online cpus within the numa node, affinity is not + * assigned and the OS may do as it pleases. Note: cpu vector affinity mapping + * is consistent with the way cpu online/offline is handled when cfg_irq_numa is + * configured. + * + * If numa mode is not enabled and there is more than 1 vector allocated, then + * the driver relies on the managed irq interface where the OS assigns vector to + * cpu affinity. The driver will then use that affinity mapping to setup its + * cpu mapping table. * * Return codes * 0 - successful @@@ -10978,13 -11391,31 +11389,31 @@@ lpfc_sli4_enable_msix(struct lpfc_hba * { int vectors, rc, index; char *name; + const struct cpumask *numa_mask = NULL; + unsigned int cpu = 0, cpu_cnt = 0, cpu_select = nr_cpu_ids; + struct lpfc_hba_eq_hdl *eqhdl; + const struct cpumask *maskp; + bool first; + unsigned int flags = PCI_IRQ_MSIX;
/* Set up MSI-X multi-message vectors */ vectors = phba->cfg_irq_chann;
- rc = pci_alloc_irq_vectors(phba->pcidev, - 1, - vectors, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + if (phba->cfg_irq_numa) { + numa_mask = &phba->sli4_hba.numa_mask; + cpu_cnt = cpumask_weight(numa_mask); + vectors = min(phba->cfg_irq_chann, cpu_cnt); + + /* cpu: iterates over numa_mask including offline or online + * cpu_select: iterates over online numa_mask to set affinity + */ + cpu = cpumask_first(numa_mask); + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + } else { + flags |= PCI_IRQ_AFFINITY; + } + + rc = pci_alloc_irq_vectors(phba->pcidev, 1, vectors, flags); if (rc < 0) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0484 PCI enable MSI-X failed (%d)\n", rc); @@@ -10994,23 -11425,61 +11423,61 @@@
/* Assign MSI-X vectors to interrupt handlers */ for (index = 0; index < vectors; index++) { - name = phba->sli4_hba.hba_eq_hdl[index].handler_name; + eqhdl = lpfc_get_eq_hdl(index); + name = eqhdl->handler_name; memset(name, 0, LPFC_SLI4_HANDLER_NAME_SZ); snprintf(name, LPFC_SLI4_HANDLER_NAME_SZ, LPFC_DRIVER_HANDLER_NAME"%d", index);
- phba->sli4_hba.hba_eq_hdl[index].idx = index; - phba->sli4_hba.hba_eq_hdl[index].phba = phba; + eqhdl->idx = index; rc = request_irq(pci_irq_vector(phba->pcidev, index), &lpfc_sli4_hba_intr_handler, 0, - name, - &phba->sli4_hba.hba_eq_hdl[index]); + name, eqhdl); if (rc) { lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0486 MSI-X fast-path (%d) " "request_irq failed (%d)\n", index, rc); goto cfg_fail_out; } + + eqhdl->irq = pci_irq_vector(phba->pcidev, index); + + if (phba->cfg_irq_numa) { + /* If found a neighboring online cpu, set affinity */ + if (cpu_select < nr_cpu_ids) + lpfc_irq_set_aff(eqhdl, cpu_select); + + /* Assign EQ to cpu_map */ + lpfc_assign_eq_map_info(phba, index, + LPFC_CPU_FIRST_IRQ, + cpu); + + /* Iterate to next offline or online cpu in numa_mask */ + cpu = cpumask_next(cpu, numa_mask); + + /* Find next online cpu in numa_mask to set affinity */ + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + } else if (vectors == 1) { + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, index, LPFC_CPU_FIRST_IRQ, + cpu); + } else { + maskp = pci_irq_get_affinity(phba->pcidev, index); + + first = true; + /* Loop through all CPUs associated with vector index */ + for_each_cpu_and(cpu, maskp, cpu_present_mask) { + /* If this is the first CPU thats assigned to + * this vector, set LPFC_CPU_FIRST_IRQ. + */ + lpfc_assign_eq_map_info(phba, index, + first ? + LPFC_CPU_FIRST_IRQ : 0, + cpu); + if (first) + first = false; + } + } }
if (vectors != phba->cfg_irq_chann) { @@@ -11020,17 -11489,18 +11487,18 @@@ phba->cfg_irq_chann, vectors); if (phba->cfg_irq_chann > vectors) phba->cfg_irq_chann = vectors; - if (phba->nvmet_support && (phba->cfg_nvmet_mrq > vectors)) - phba->cfg_nvmet_mrq = vectors; }
return rc;
cfg_fail_out: /* free the irq already requested */ - for (--index; index >= 0; index--) - free_irq(pci_irq_vector(phba->pcidev, index), - &phba->sli4_hba.hba_eq_hdl[index]); + for (--index; index >= 0; index--) { + eqhdl = lpfc_get_eq_hdl(index); + lpfc_irq_clear_aff(eqhdl); + irq_set_affinity_hint(eqhdl->irq, NULL); + free_irq(eqhdl->irq, eqhdl); + }
/* Unconfigure MSI-X capability structure */ pci_free_irq_vectors(phba->pcidev); @@@ -11057,6 -11527,8 +11525,8 @@@ static in lpfc_sli4_enable_msi(struct lpfc_hba *phba) { int rc, index; + unsigned int cpu; + struct lpfc_hba_eq_hdl *eqhdl;
rc = pci_alloc_irq_vectors(phba->pcidev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_AFFINITY); @@@ -11078,9 -11550,15 +11548,15 @@@ return rc; }
+ eqhdl = lpfc_get_eq_hdl(0); + eqhdl->irq = pci_irq_vector(phba->pcidev, 0); + + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, 0, LPFC_CPU_FIRST_IRQ, cpu); + for (index = 0; index < phba->cfg_irq_chann; index++) { - phba->sli4_hba.hba_eq_hdl[index].idx = index; - phba->sli4_hba.hba_eq_hdl[index].phba = phba; + eqhdl = lpfc_get_eq_hdl(index); + eqhdl->idx = index; }
return 0; @@@ -11138,15 -11616,21 +11614,21 @@@ lpfc_sli4_enable_intr(struct lpfc_hba * IRQF_SHARED, LPFC_DRIVER_NAME, phba); if (!retval) { struct lpfc_hba_eq_hdl *eqhdl; + unsigned int cpu;
/* Indicate initialization to INTx mode */ phba->intr_type = INTx; intr_mode = 0;
+ eqhdl = lpfc_get_eq_hdl(0); + eqhdl->irq = pci_irq_vector(phba->pcidev, 0); + + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, 0, LPFC_CPU_FIRST_IRQ, + cpu); for (idx = 0; idx < phba->cfg_irq_chann; idx++) { - eqhdl = &phba->sli4_hba.hba_eq_hdl[idx]; + eqhdl = lpfc_get_eq_hdl(idx); eqhdl->idx = idx; - eqhdl->phba = phba; } } } @@@ -11168,14 -11652,14 +11650,14 @@@ lpfc_sli4_disable_intr(struct lpfc_hba /* Disable the currently initialized interrupt mode */ if (phba->intr_type == MSIX) { int index; + struct lpfc_hba_eq_hdl *eqhdl;
/* Free up MSI-X multi-message vectors */ for (index = 0; index < phba->cfg_irq_chann; index++) { - irq_set_affinity_hint( - pci_irq_vector(phba->pcidev, index), - NULL); - free_irq(pci_irq_vector(phba->pcidev, index), - &phba->sli4_hba.hba_eq_hdl[index]); + eqhdl = lpfc_get_eq_hdl(index); + lpfc_irq_clear_aff(eqhdl); + irq_set_affinity_hint(eqhdl->irq, NULL); + free_irq(eqhdl->irq, eqhdl); } } else { free_irq(phba->pcidev->irq, phba); @@@ -11367,6 -11851,9 +11849,9 @@@ lpfc_sli4_hba_unset(struct lpfc_hba *ph /* Wait for completion of device XRI exchange busy */ lpfc_sli4_xri_exchange_busy_wait(phba);
+ /* per-phba callback de-registration for hotplug event */ + lpfc_cpuhp_remove(phba); + /* Disable PCI subsystem interrupt */ lpfc_sli4_disable_intr(phba);
@@@ -11538,6 -12025,7 +12023,7 @@@ lpfc_get_sli4_parameters(struct lpfc_hb sli4_params->cqav = bf_get(cfg_cqav, mbx_sli4_parameters); sli4_params->wqsize = bf_get(cfg_wqsize, mbx_sli4_parameters); sli4_params->bv1s = bf_get(cfg_bv1s, mbx_sli4_parameters); + sli4_params->pls = bf_get(cfg_pvl, mbx_sli4_parameters); sli4_params->sgl_pages_max = bf_get(cfg_sgl_page_cnt, mbx_sli4_parameters); sli4_params->wqpcnt = bf_get(cfg_wqpcnt, mbx_sli4_parameters); @@@ -11589,13 -12077,10 +12075,10 @@@ fcponly }
/* If the NVME FC4 type is enabled, scale the sg_seg_cnt to - * accommodate 512K and 1M IOs in a single nvme buf and supply - * enough NVME LS iocb buffers for larger connectivity counts. + * accommodate 512K and 1M IOs in a single nvme buf. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - phba->cfg_iocb_cnt = 5; - }
/* Only embed PBDE for if_type 6, PBDE support requires xib be set */ if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != @@@ -12312,35 -12797,57 +12795,57 @@@ lpfc_sli4_get_iocb_cnt(struct lpfc_hba }
- static void + static int lpfc_log_write_firmware_error(struct lpfc_hba *phba, uint32_t offset, uint32_t magic_number, uint32_t ftype, uint32_t fid, uint32_t fsize, const struct firmware *fw) { - if ((offset == ADD_STATUS_FW_NOT_SUPPORTED) || + int rc; + + /* Three cases: (1) FW was not supported on the detected adapter. + * (2) FW update has been locked out administratively. + * (3) Some other error during FW update. + * In each case, an unmaskable message is written to the console + * for admin diagnosis. + */ + if (offset == ADD_STATUS_FW_NOT_SUPPORTED || (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC && - magic_number != MAGIC_NUMER_G6) || + magic_number != MAGIC_NUMBER_G6) || (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC && - magic_number != MAGIC_NUMER_G7)) + magic_number != MAGIC_NUMBER_G7)) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3030 This firmware version is not supported on " - "this HBA model. Device:%x Magic:%x Type:%x " - "ID:%x Size %d %zd\n", - phba->pcidev->device, magic_number, ftype, fid, - fsize, fw->size); - else + "3030 This firmware version is not supported on" + " this HBA model. Device:%x Magic:%x Type:%x " + "ID:%x Size %d %zd\n", + phba->pcidev->device, magic_number, ftype, fid, + fsize, fw->size); + rc = -EINVAL; + } else if (offset == ADD_STATUS_FW_DOWNLOAD_HW_DISABLED) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3022 FW Download failed. Device:%x Magic:%x Type:%x " - "ID:%x Size %d %zd\n", - phba->pcidev->device, magic_number, ftype, fid, - fsize, fw->size); + "3021 Firmware downloads have been prohibited " + "by a system configuration setting on " + "Device:%x Magic:%x Type:%x ID:%x Size %d " + "%zd\n", + phba->pcidev->device, magic_number, ftype, fid, + fsize, fw->size); + rc = -EACCES; + } else { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3022 FW Download failed. Add Status x%x " + "Device:%x Magic:%x Type:%x ID:%x Size %d " + "%zd\n", + offset, phba->pcidev->device, magic_number, + ftype, fid, fsize, fw->size); + rc = -EIO; + } + return rc; }
- /** * lpfc_write_firmware - attempt to write a firmware image to the port * @fw: pointer to firmware image returned from request_firmware. - * @phba: pointer to lpfc hba data structure. + * @context: pointer to firmware image returned from request_firmware. + * @ret: return value this routine provides to the caller. * **/ static void @@@ -12409,8 -12916,12 +12914,12 @@@ lpfc_write_firmware(const struct firmwa rc = lpfc_wr_object(phba, &dma_buffer_list, (fw->size - offset), &offset); if (rc) { - lpfc_log_write_firmware_error(phba, offset, - magic_number, ftype, fid, fsize, fw); + rc = lpfc_log_write_firmware_error(phba, offset, + magic_number, + ftype, + fid, + fsize, + fw); goto release_out; } } @@@ -12430,9 -12941,12 +12939,12 @@@ release_out } release_firmware(fw); out: - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3024 Firmware update done: %d.\n", rc); - return; + if (rc < 0) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3062 Firmware update error, status %d.\n", rc); + else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3024 Firmware update success: size %d.\n", rc); }
/** @@@ -12551,6 -13065,12 +13063,12 @@@ lpfc_pci_probe_one_s4(struct pci_dev *p phba->pport = NULL; lpfc_stop_port(phba);
+ /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); + + /* Init hba_eq_hdl array */ + lpfc_hba_eq_hdl_array_init(phba); + /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); if (intr_mode == LPFC_INTR_ERROR) { @@@ -12632,6 -13152,9 +13150,9 @@@ /* Enable RAS FW log support */ lpfc_sli4_ras_setup(phba);
+ INIT_LIST_HEAD(&phba->poll_list); + cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp); + return 0;
out_free_sysfs_attr: @@@ -13344,8 -13867,7 +13865,7 @@@ lpfc_sli4_oas_verify(struct lpfc_hba *p phba->cfg_fof = 1; } else { phba->cfg_fof = 0; - if (phba->device_data_mem_pool) - mempool_destroy(phba->device_data_mem_pool); + mempool_destroy(phba->device_data_mem_pool); phba->device_data_mem_pool = NULL; }
@@@ -13450,11 -13972,24 +13970,24 @@@ lpfc_init(void /* Initialize in case vector mapping is needed */ lpfc_present_cpu = num_present_cpus();
+ error = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "lpfc/sli4:online", + lpfc_cpu_online, lpfc_cpu_offline); + if (error < 0) + goto cpuhp_failure; + lpfc_cpuhp_state = error; + error = pci_register_driver(&lpfc_driver); - if (error) { - fc_release_transport(lpfc_transport_template); - fc_release_transport(lpfc_vport_transport_template); - } + if (error) + goto unwind; + + return error; + + unwind: + cpuhp_remove_multi_state(lpfc_cpuhp_state); + cpuhp_failure: + fc_release_transport(lpfc_transport_template); + fc_release_transport(lpfc_vport_transport_template);
return error; } @@@ -13471,6 -14006,7 +14004,7 @@@ lpfc_exit(void { misc_deregister(&lpfc_mgmt_dev); pci_unregister_driver(&lpfc_driver); + cpuhp_remove_multi_state(lpfc_cpuhp_state); fc_release_transport(lpfc_transport_template); fc_release_transport(lpfc_vport_transport_template); idr_destroy(&lpfc_hba_index); diff --combined drivers/scsi/lpfc/lpfc_nportdisc.c index fc6e4546d738,3bbe77c36a05..ae4359013846 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@@ -279,6 -279,55 +279,55 @@@ lpfc_els_abort(struct lpfc_hba *phba, s lpfc_cancel_retry_delay_tmo(phba->pport, ndlp); }
+ /* lpfc_defer_pt2pt_acc - Complete SLI3 pt2pt processing on link up + * @phba: pointer to lpfc hba data structure. + * @link_mbox: pointer to CONFIG_LINK mailbox object + * + * This routine is only called if we are SLI3, direct connect pt2pt + * mode and the remote NPort issues the PLOGI after link up. + */ + static void + lpfc_defer_pt2pt_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *link_mbox) + { + LPFC_MBOXQ_t *login_mbox; + MAILBOX_t *mb = &link_mbox->u.mb; + struct lpfc_iocbq *save_iocb; + struct lpfc_nodelist *ndlp; + int rc; + + ndlp = link_mbox->ctx_ndlp; + login_mbox = link_mbox->context3; + save_iocb = login_mbox->context3; + link_mbox->context3 = NULL; + login_mbox->context3 = NULL; + + /* Check for CONFIG_LINK error */ + if (mb->mbxStatus) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4575 CONFIG_LINK fails pt2pt discovery: %x\n", + mb->mbxStatus); + mempool_free(login_mbox, phba->mbox_mem_pool); + mempool_free(link_mbox, phba->mbox_mem_pool); + lpfc_sli_release_iocbq(phba, save_iocb); + return; + } + + /* Now that CONFIG_LINK completed, and our SID is configured, + * we can now proceed with sending the PLOGI ACC. + */ + rc = lpfc_els_rsp_acc(link_mbox->vport, ELS_CMD_PLOGI, + save_iocb, ndlp, login_mbox); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4576 PLOGI ACC fails pt2pt discovery: %x\n", + rc); + mempool_free(login_mbox, phba->mbox_mem_pool); + } + + mempool_free(link_mbox, phba->mbox_mem_pool); + lpfc_sli_release_iocbq(phba, save_iocb); + } + static int lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_iocbq *cmdiocb) @@@ -291,10 -340,12 +340,12 @@@ IOCB_t *icmd; struct serv_parm *sp; uint32_t ed_tov; - LPFC_MBOXQ_t *mbox; + LPFC_MBOXQ_t *link_mbox; + LPFC_MBOXQ_t *login_mbox; + struct lpfc_iocbq *save_iocb; struct ls_rjt stat; uint32_t vid, flag; - int rc; + int rc, defer_acc;
memset(&stat, 0, sizeof (struct ls_rjt)); pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; @@@ -343,6 -394,7 +394,7 @@@ else ndlp->nlp_fcp_info |= CLASS3;
+ defer_acc = 0; ndlp->nlp_class_sup = 0; if (sp->cls1.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS1; @@@ -354,7 -406,6 +406,6 @@@ ndlp->nlp_class_sup |= FC_COS_CLASS4; ndlp->nlp_maxframe = ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb; - /* if already logged in, do implicit logout */ switch (ndlp->nlp_state) { case NLP_STE_NPR_NODE: @@@ -396,6 -447,10 +447,10 @@@ ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; ndlp->nlp_flag &= ~NLP_FIRSTBURST;
+ login_mbox = NULL; + link_mbox = NULL; + save_iocb = NULL; + /* Check for Nport to NPort pt2pt protocol */ if ((vport->fc_flag & FC_PT2PT) && !(vport->fc_flag & FC_PT2PT_PLOGI)) { @@@ -423,17 -478,22 +478,22 @@@ if (phba->sli_rev == LPFC_SLI_REV4) lpfc_issue_reg_vfi(vport); else { - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (mbox == NULL) + defer_acc = 1; + link_mbox = mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL); + if (!link_mbox) goto out; - lpfc_config_link(phba, mbox); - mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->vport = vport; - rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); - if (rc == MBX_NOT_FINISHED) { - mempool_free(mbox, phba->mbox_mem_pool); + lpfc_config_link(phba, link_mbox); + link_mbox->mbox_cmpl = lpfc_defer_pt2pt_acc; + link_mbox->vport = vport; + link_mbox->ctx_ndlp = ndlp; + + save_iocb = lpfc_sli_get_iocbq(phba); + if (!save_iocb) goto out; - } + /* Save info from cmd IOCB used in rsp */ + memcpy((uint8_t *)save_iocb, (uint8_t *)cmdiocb, + sizeof(struct lpfc_iocbq)); }
lpfc_can_disctmo(vport); @@@ -448,8 -508,8 +508,8 @@@ ndlp->nlp_flag |= NLP_SUPPRESS_RSP; }
- mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mbox) + login_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!login_mbox) goto out;
/* Registering an existing RPI behaves differently for SLI3 vs SLI4 */ @@@ -457,21 -517,19 +517,19 @@@ lpfc_unreg_rpi(vport, ndlp);
rc = lpfc_reg_rpi(phba, vport->vpi, icmd->un.rcvels.remoteID, - (uint8_t *) sp, mbox, ndlp->nlp_rpi); - if (rc) { - mempool_free(mbox, phba->mbox_mem_pool); + (uint8_t *)sp, login_mbox, ndlp->nlp_rpi); + if (rc) goto out; - }
/* ACC PLOGI rsp command needs to execute first, - * queue this mbox command to be processed later. + * queue this login_mbox command to be processed later. */ - mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; + login_mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; /* - * mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox + * login_mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox * command issued in lpfc_cmpl_els_acc(). */ - mbox->vport = vport; + login_mbox->vport = vport; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); spin_unlock_irq(shost->host_lock); @@@ -484,8 -542,10 +542,10 @@@ * single discovery thread, this will cause a huge delay in * discovery. Also this will cause multiple state machines * running in parallel for this node. + * This only applies to a fabric environment. */ - if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) { + if ((ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) && + (vport->fc_flag & FC_FABRIC)) { /* software abort outstanding PLOGI */ lpfc_els_abort(phba, ndlp); } @@@ -504,16 -564,47 +564,47 @@@ stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD; stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; rc = lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, - ndlp, mbox); + ndlp, login_mbox); if (rc) - mempool_free(mbox, phba->mbox_mem_pool); + mempool_free(login_mbox, phba->mbox_mem_pool); + return 1; + } + if (defer_acc) { + /* So the order here should be: + * Issue CONFIG_LINK mbox + * CONFIG_LINK cmpl + * Issue PLOGI ACC + * PLOGI ACC cmpl + * Issue REG_LOGIN mbox + */ + + /* Save the REG_LOGIN mbox for and rcv IOCB copy later */ + link_mbox->context3 = login_mbox; + login_mbox->context3 = save_iocb; + + /* Start the ball rolling by issuing CONFIG_LINK here */ + rc = lpfc_sli_issue_mbox(phba, link_mbox, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) + goto out; return 1; } - rc = lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox); + + rc = lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, login_mbox); if (rc) - mempool_free(mbox, phba->mbox_mem_pool); + mempool_free(login_mbox, phba->mbox_mem_pool); return 1; out: + if (defer_acc) + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4577 pt2pt discovery failure: %p %p %p\n", + save_iocb, link_mbox, login_mbox); + if (save_iocb) + lpfc_sli_release_iocbq(phba, save_iocb); + if (link_mbox) + mempool_free(link_mbox, phba->mbox_mem_pool); + if (login_mbox) + mempool_free(login_mbox, phba->mbox_mem_pool); + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); @@@ -851,9 -942,9 +942,9 @@@ lpfc_disc_set_adisc(struct lpfc_vport *
if (!(vport->fc_flag & FC_PT2PT)) { /* Check config parameter use-adisc or FCP-2 */ - if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || + if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) || ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) && - (ndlp->nlp_type & NLP_FCP_TARGET))) { + (ndlp->nlp_type & NLP_FCP_TARGET)))) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_ADISC; spin_unlock_irq(shost->host_lock); @@@ -2030,7 -2121,9 +2121,9 @@@ lpfc_cmpl_prli_prli_issue(struct lpfc_v if (bf_get_be32(prli_init, nvpr)) ndlp->nlp_type |= NLP_NVME_INITIATOR;
- if (phba->nsler && bf_get_be32(prli_nsler, nvpr)) + if (phba->nsler && bf_get_be32(prli_nsler, nvpr) && + bf_get_be32(prli_conf, nvpr)) + ndlp->nlp_nvme_info |= NLP_NVME_NSLER; else ndlp->nlp_nvme_info &= ~NLP_NVME_NSLER; diff --combined drivers/scsi/lpfc/lpfc_scsi.c index 6822cd9ff8f1,ba26df90a36a..b138d9fee675 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@@ -134,21 -134,21 +134,21 @@@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_
/** * lpfc_update_stats - Update statistical data for the command completion - * @phba: Pointer to HBA object. + * @vport: The virtual port on which this call is executing. * @lpfc_cmd: lpfc scsi command object pointer. * * This function is called when there is a command completion and this * function updates the statistical data for the command completion. **/ static void - lpfc_update_stats(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) + lpfc_update_stats(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd) { + struct lpfc_hba *phba = vport->phba; struct lpfc_rport_data *rdata; struct lpfc_nodelist *pnode; struct scsi_cmnd *cmd = lpfc_cmd->pCmd; unsigned long flags; - struct Scsi_Host *shost = cmd->device->host; - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); unsigned long latency; int i;
@@@ -526,8 -526,9 +526,8 @@@ lpfc_sli4_io_xri_aborted(struct lpfc_hb &qp->lpfc_abts_io_buf_list, list) { if (psb->cur_iocbq.sli4_xritag == xri) { list_del_init(&psb->list); - psb->exch_busy = 0; + psb->flags &= ~LPFC_SBUF_XBUSY; psb->status = IOSTAT_SUCCESS; -#ifdef BUILD_NVME if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) { qp->abts_nvme_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock); @@@ -535,6 -536,7 +535,6 @@@ lpfc_sli4_nvme_xri_aborted(phba, axri, psb); return; } -#endif qp->abts_scsi_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock);
@@@ -566,7 -568,7 +566,7 @@@ if (iocbq->sli4_xritag != xri) continue; psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq); - psb->exch_busy = 0; + psb->flags &= ~LPFC_SBUF_XBUSY; spin_unlock_irqrestore(&phba->hbalock, iflag); if (!list_empty(&pring->txq)) lpfc_worker_wake_up(phba); @@@ -786,7 -788,7 +786,7 @@@ lpfc_release_scsi_buf_s4(struct lpfc_hb psb->prot_seg_cnt = 0;
qp = psb->hdwq; - if (psb->exch_busy) { + if (psb->flags & LPFC_SBUF_XBUSY) { spin_lock_irqsave(&qp->abts_io_buf_list_lock, iflag); psb->pCmd = NULL; list_add_tail(&psb->list, &qp->lpfc_abts_io_buf_list); @@@ -3812,7 -3814,7 +3812,7 @@@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hb
/* Sanity check on return of outstanding command */ cmd = lpfc_cmd->pCmd; - if (!cmd) { + if (!cmd || !phba) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "2621 IO completion: Not an active IO\n"); spin_unlock(&lpfc_cmd->buf_lock); @@@ -3824,7 -3826,7 +3824,7 @@@ phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) { + if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) { cpu = raw_smp_processor_id(); if (cpu < LPFC_CHECK_CPU_CNT && phba->sli4_hba.hdwq) phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++; @@@ -3835,7 -3837,10 +3835,10 @@@ lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK); lpfc_cmd->status = pIocbOut->iocb.ulpStatus; /* pick up SLI4 exhange busy status from HBA */ - lpfc_cmd->exch_busy = pIocbOut->iocb_flag & LPFC_EXCHANGE_BUSY; + if (pIocbOut->iocb_flag & LPFC_EXCHANGE_BUSY) + lpfc_cmd->flags |= LPFC_SBUF_XBUSY; + else + lpfc_cmd->flags &= ~LPFC_SBUF_XBUSY;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (lpfc_cmd->prot_data_type) { @@@ -3869,7 -3874,7 +3872,7 @@@ } #endif
- if (lpfc_cmd->status) { + if (unlikely(lpfc_cmd->status)) { if (lpfc_cmd->status == IOSTAT_LOCAL_REJECT && (lpfc_cmd->result & IOERR_DRVR_MASK)) lpfc_cmd->status = IOSTAT_DRIVER_REJECT; @@@ -4002,7 -4007,7 +4005,7 @@@ scsi_get_resid(cmd)); }
- lpfc_update_stats(phba, lpfc_cmd); + lpfc_update_stats(vport, lpfc_cmd); if (vport->cfg_max_scsicmpl_time && time_after(jiffies, lpfc_cmd->start_time + msecs_to_jiffies(vport->cfg_max_scsicmpl_time))) { @@@ -4610,17 -4615,18 +4613,18 @@@ lpfc_queuecommand(struct Scsi_Host *sho err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd); }
- if (err == 2) { - cmnd->result = DID_ERROR << 16; - goto out_fail_command_release_buf; - } else if (err) { + if (unlikely(err)) { + if (err == 2) { + cmnd->result = DID_ERROR << 16; + goto out_fail_command_release_buf; + } goto out_host_busy_free_buf; }
lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) { + if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) { cpu = raw_smp_processor_id(); if (cpu < LPFC_CHECK_CPU_CNT) { struct lpfc_sli4_hdw_queue *hdwq = @@@ -4843,20 -4849,21 +4847,21 @@@ lpfc_abort_handler(struct scsi_cmnd *cm ret_val = __lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0); } - /* no longer need the lock after this point */ - spin_unlock_irqrestore(&phba->hbalock, flags);
if (ret_val == IOCB_ERROR) { /* Indicate the IO is not being aborted by the driver. */ iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED; lpfc_cmd->waitq = NULL; spin_unlock(&lpfc_cmd->buf_lock); + spin_unlock_irqrestore(&phba->hbalock, flags); lpfc_sli_release_iocbq(phba, abtsiocb); ret = FAILED; goto out; }
+ /* no longer need the lock after this point */ spin_unlock(&lpfc_cmd->buf_lock); + spin_unlock_irqrestore(&phba->hbalock, flags);
if (phba->cfg_poll & DISABLE_FCP_RING_INT) lpfc_sli_handle_fast_ring_event(phba, diff --combined drivers/scsi/lpfc/lpfc_sli.c index 614f78dddafe,7c8527bd1677..c82b5792da98 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@@ -87,6 -87,10 +87,10 @@@ static void lpfc_sli4_hba_handle_eqe(st struct lpfc_eqe *eqe); static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba); static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba); + static struct lpfc_cqe *lpfc_sli4_cq_get(struct lpfc_queue *q); + static void __lpfc_sli4_consume_cqe(struct lpfc_hba *phba, + struct lpfc_queue *cq, + struct lpfc_cqe *cqe);
static IOCB_t * lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq) @@@ -467,25 -471,52 +471,52 @@@ __lpfc_sli4_consume_eqe(struct lpfc_hb }
static void - lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq) + lpfc_sli4_eqcq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq) { - struct lpfc_eqe *eqe; - uint32_t count = 0; + struct lpfc_eqe *eqe = NULL; + u32 eq_count = 0, cq_count = 0; + struct lpfc_cqe *cqe = NULL; + struct lpfc_queue *cq = NULL, *childq = NULL; + int cqid = 0;
/* walk all the EQ entries and drop on the floor */ eqe = lpfc_sli4_eq_get(eq); while (eqe) { + /* Get the reference to the corresponding CQ */ + cqid = bf_get_le32(lpfc_eqe_resource_id, eqe); + cq = NULL; + + list_for_each_entry(childq, &eq->child_list, list) { + if (childq->queue_id == cqid) { + cq = childq; + break; + } + } + /* If CQ is valid, iterate through it and drop all the CQEs */ + if (cq) { + cqe = lpfc_sli4_cq_get(cq); + while (cqe) { + __lpfc_sli4_consume_cqe(phba, cq, cqe); + cq_count++; + cqe = lpfc_sli4_cq_get(cq); + } + /* Clear and re-arm the CQ */ + phba->sli4_hba.sli4_write_cq_db(phba, cq, cq_count, + LPFC_QUEUE_REARM); + cq_count = 0; + } __lpfc_sli4_consume_eqe(phba, eq, eqe); - count++; + eq_count++; eqe = lpfc_sli4_eq_get(eq); }
/* Clear and re-arm the EQ */ - phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM); + phba->sli4_hba.sli4_write_eq_db(phba, eq, eq_count, LPFC_QUEUE_REARM); }
static int - lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq) + lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq, + uint8_t rearm) { struct lpfc_eqe *eqe; int count = 0, consumed = 0; @@@ -519,8 -550,8 +550,8 @@@ eq->queue_claimed = 0;
rearm_and_exit: - /* Always clear and re-arm the EQ */ - phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM); + /* Always clear the EQ. */ + phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, rearm);
return count; } @@@ -2526,6 -2557,8 +2557,8 @@@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba } else { __lpfc_sli_rpi_release(vport, ndlp); } + if (vport->load_flag & FC_UNLOADING) + lpfc_nlp_put(ndlp); pmb->ctx_ndlp = NULL; } } @@@ -2672,7 -2705,8 +2705,8 @@@ lpfc_sli_handle_mb_event(struct lpfc_hb lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "(%d):0323 Unknown Mailbox command " "x%x (x%x/x%x) Cmpl\n", - pmb->vport ? pmb->vport->vpi : 0, + pmb->vport ? pmb->vport->vpi : + LPFC_VPORT_UNKNOWN, pmbox->mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, pmb), @@@ -2693,7 -2727,8 +2727,8 @@@ "(%d):0305 Mbox cmd cmpl " "error - RETRYing Data: x%x " "(x%x/x%x) x%x x%x x%x\n", - pmb->vport ? pmb->vport->vpi : 0, + pmb->vport ? pmb->vport->vpi : + LPFC_VPORT_UNKNOWN, pmbox->mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, pmb), @@@ -2701,7 -2736,8 +2736,8 @@@ pmb), pmbox->mbxStatus, pmbox->un.varWords[0], - pmb->vport->port_state); + pmb->vport ? pmb->vport->port_state : + LPFC_VPORT_UNKNOWN); pmbox->mbxStatus = 0; pmbox->mbxOwner = OWN_HOST; rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); @@@ -6167,6 -6203,14 +6203,14 @@@ lpfc_set_features(struct lpfc_hba *phba mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS; mbox->u.mqe.un.set_feature.param_len = 8; break; + case LPFC_SET_DUAL_DUMP: + bf_set(lpfc_mbx_set_feature_dd, + &mbox->u.mqe.un.set_feature, LPFC_ENABLE_DUAL_DUMP); + bf_set(lpfc_mbx_set_feature_ddquery, + &mbox->u.mqe.un.set_feature, 0); + mbox->u.mqe.un.set_feature.feature = LPFC_SET_DUAL_DUMP; + mbox->u.mqe.un.set_feature.param_len = 4; + break; }
return; @@@ -6184,11 -6228,16 +6228,16 @@@ lpfc_ras_stop_fwlog(struct lpfc_hba *ph { struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
- ras_fwlog->ras_active = false; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = INACTIVE; + spin_unlock_irq(&phba->hbalock);
/* Disable FW logging to host memory */ writel(LPFC_CTL_PDEV_CTL_DDL_RAS, phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); + + /* Wait 10ms for firmware to stop using DMA buffer */ + usleep_range(10 * 1000, 20 * 1000); }
/** @@@ -6224,7 -6273,9 +6273,9 @@@ lpfc_sli4_ras_dma_free(struct lpfc_hba ras_fwlog->lwpd.virt = NULL; }
- ras_fwlog->ras_active = false; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = INACTIVE; + spin_unlock_irq(&phba->hbalock); }
/** @@@ -6326,7 -6377,9 +6377,9 @@@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hb goto disable_ras; }
- ras_fwlog->ras_active = true; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = ACTIVE; + spin_unlock_irq(&phba->hbalock); mempool_free(pmb, phba->mbox_mem_pool);
return; @@@ -6358,6 -6411,10 +6411,10 @@@ lpfc_sli4_ras_fwlog_init(struct lpfc_hb uint32_t len = 0, fwlog_buffsize, fwlog_entry_count; int rc = 0;
+ spin_lock_irq(&phba->hbalock); + ras_fwlog->state = INACTIVE; + spin_unlock_irq(&phba->hbalock); + fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE * phba->cfg_ras_fwlog_buffsize); fwlog_entry_count = (fwlog_buffsize/LPFC_RAS_MAX_ENTRY_SIZE); @@@ -6417,6 -6474,9 +6474,9 @@@ mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys); mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys);
+ spin_lock_irq(&phba->hbalock); + ras_fwlog->state = REG_INPROGRESS; + spin_unlock_irq(&phba->hbalock); mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl;
@@@ -7148,7 -7208,7 +7208,7 @@@ lpfc_post_rq_buffer(struct lpfc_hba *ph int lpfc_sli4_hba_setup(struct lpfc_hba *phba) { - int rc, i, cnt, len; + int rc, i, cnt, len, dd; LPFC_MBOXQ_t *mboxq; struct lpfc_mqe *mqe; uint8_t *vpd; @@@ -7399,6 -7459,23 +7459,23 @@@ phba->sli3_options |= (LPFC_SLI3_NPIV_ENABLED | LPFC_SLI3_HBQ_ENABLED); spin_unlock_irq(&phba->hbalock);
+ /* Always try to enable dual dump feature if we can */ + lpfc_set_features(phba, mboxq, LPFC_SET_DUAL_DUMP); + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + dd = bf_get(lpfc_mbx_set_feature_dd, &mboxq->u.mqe.un.set_feature); + if ((rc == MBX_SUCCESS) && (dd == LPFC_ENABLE_DUAL_DUMP)) + lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_INIT, + "6448 Dual Dump is enabled\n"); + else + lpfc_printf_log(phba, KERN_INFO, LOG_SLI | LOG_INIT, + "6447 Dual Dump Mailbox x%x (x%x/x%x) failed, " + "rc:x%x dd:x%x\n", + bf_get(lpfc_mqe_command, &mboxq->u.mqe), + lpfc_sli_config_mbox_subsys_get( + phba, mboxq), + lpfc_sli_config_mbox_opcode_get( + phba, mboxq), + rc, dd); /* * Allocate all resources (xri,rpi,vpi,vfi) now. Subsequent * calls depends on these resources to complete port setup. @@@ -7523,9 -7600,11 +7600,11 @@@ } phba->sli4_hba.nvmet_xri_cnt = rc;
- cnt = phba->cfg_iocb_cnt * 1024; - /* We need 1 iocbq for every SGL, for IO processing */ - cnt += phba->sli4_hba.nvmet_xri_cnt; + /* We allocate an iocbq for every receive context SGL. + * The additional allocation is for abort and ls handling. + */ + cnt = phba->sli4_hba.nvmet_xri_cnt + + phba->sli4_hba.max_cfg_param.max_xri; } else { /* update host common xri-sgl sizes and mappings */ rc = lpfc_sli4_io_sgl_update(phba); @@@ -7547,14 -7626,17 +7626,17 @@@ rc = -ENODEV; goto out_destroy_queue; } - cnt = phba->cfg_iocb_cnt * 1024; + /* Each lpfc_io_buf job structure has an iocbq element. + * This cnt provides for abort, els, ct and ls requests. + */ + cnt = phba->sli4_hba.max_cfg_param.max_xri; }
if (!phba->sli.iocbq_lookup) { /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d total %d\n", - phba->cfg_iocb_cnt, cnt); + "2821 initialize iocb list with %d entries\n", + cnt); rc = lpfc_init_iocb_list(phba, cnt); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@@ -7866,7 -7948,7 +7948,7 @@@ lpfc_sli4_process_missed_mbox_completio if (sli4_hba->hdwq) { for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) { eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq; - if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { + if (eq && eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { fpeq = eq; break; } @@@ -7892,7 -7974,7 +7974,7 @@@
if (mbox_pending) /* process and rearm the EQ */ - lpfc_sli4_process_eq(phba, fpeq); + lpfc_sli4_process_eq(phba, fpeq, LPFC_QUEUE_REARM); else /* Always clear and re-arm the EQ */ sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM); @@@ -8964,7 -9046,8 +9046,8 @@@ lpfc_mbox_api_table_setup(struct lpfc_h * @pring: Pointer to driver SLI ring object. * @piocb: Pointer to address of newly added command iocb. * - * This function is called with hbalock held to add a command + * This function is called with hbalock held for SLI3 ports or + * the ring lock held for SLI4 ports to add a command * iocb to the txq when SLI layer cannot submit the command iocb * to the ring. **/ @@@ -8972,7 -9055,10 +9055,10 @@@ voi __lpfc_sli_ringtx_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb) { - lockdep_assert_held(&phba->hbalock); + if (phba->sli_rev == LPFC_SLI_REV4) + lockdep_assert_held(&pring->ring_lock); + else + lockdep_assert_held(&phba->hbalock); /* Insert the caller's iocb in the txq tail for later processing. */ list_add_tail(&piocb->list, &pring->txq); } @@@ -9863,7 -9949,7 +9949,7 @@@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phb * __lpfc_sli_issue_iocb_s4 is used by other functions in the driver to issue * an iocb command to an HBA with SLI-4 interface spec. * - * This function is called with hbalock held. The function will return success + * This function is called with ringlock held. The function will return success * after it successfully submit the iocb to firmware or after adding to the * txq. **/ @@@ -10053,10 -10139,13 +10139,13 @@@ lpfc_sli_issue_iocb(struct lpfc_hba *ph struct lpfc_iocbq *piocb, uint32_t flag) { struct lpfc_sli_ring *pring; + struct lpfc_queue *eq; unsigned long iflags; int rc;
if (phba->sli_rev == LPFC_SLI_REV4) { + eq = phba->sli4_hba.hdwq[piocb->hba_wqidx].hba_eq; + pring = lpfc_sli4_calc_ring(phba, piocb); if (unlikely(pring == NULL)) return IOCB_ERROR; @@@ -10064,6 -10153,8 +10153,8 @@@ spin_lock_irqsave(&pring->ring_lock, iflags); rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(eq, LPFC_POLL_FASTPATH); } else { /* For now, SLI2/3 will still use hbalock */ spin_lock_irqsave(&phba->hbalock, iflags); @@@ -10678,14 -10769,14 +10769,14 @@@ lpfc_sli_host_down(struct lpfc_vport *v set_bit(LPFC_DATA_READY, &phba->data_flags); } prev_pring_flag = pring->flag; - spin_lock_irq(&pring->ring_lock); + spin_lock(&pring->ring_lock); list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { if (iocb->vport != vport) continue; list_move_tail(&iocb->list, &completions); } - spin_unlock_irq(&pring->ring_lock); + spin_unlock(&pring->ring_lock); list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { if (iocb->vport != vport) @@@ -11050,9 -11141,6 +11141,6 @@@ lpfc_sli_abort_els_cmpl(struct lpfc_hb irsp->ulpStatus, irsp->un.ulpWord[4]);
spin_unlock_irq(&phba->hbalock); - if (irsp->ulpStatus == IOSTAT_LOCAL_REJECT && - irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) - lpfc_sli_release_iocbq(phba, abort_iocb); } release_iocb: lpfc_sli_release_iocbq(phba, cmdiocb); @@@ -11736,7 -11824,10 +11824,10 @@@ lpfc_sli_wake_iocb_wait(struct lpfc_hb !(cmdiocbq->iocb_flag & LPFC_IO_LIBDFC)) { lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf, cur_iocbq); - lpfc_cmd->exch_busy = rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY; + if (rspiocbq && (rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY)) + lpfc_cmd->flags |= LPFC_SBUF_XBUSY; + else + lpfc_cmd->flags &= ~LPFC_SBUF_XBUSY; }
pdone_q = cmdiocbq->context_un.wait_queue; @@@ -13158,13 -13249,19 +13249,19 @@@ send_current_mbox phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; /* Setting active mailbox pointer need to be in sync to flag clear */ phba->sli.mbox_active = NULL; + if (bf_get(lpfc_trailer_consumed, mcqe)) + lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); spin_unlock_irqrestore(&phba->hbalock, iflags); /* Wake up worker thread to post the next pending mailbox command */ lpfc_worker_wake_up(phba); + return workposted; + out_no_mqe_complete: + spin_lock_irqsave(&phba->hbalock, iflags); if (bf_get(lpfc_trailer_consumed, mcqe)) lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); - return workposted; + spin_unlock_irqrestore(&phba->hbalock, iflags); + return false; }
/** @@@ -13217,7 -13314,6 +13314,6 @@@ lpfc_sli4_sp_handle_els_wcqe(struct lpf struct lpfc_sli_ring *pring = cq->pring; int txq_cnt = 0; int txcmplq_cnt = 0; - int fcp_txcmplq_cnt = 0;
/* Check for response status */ if (unlikely(bf_get(lpfc_wcqe_c_status, wcqe))) { @@@ -13239,9 -13335,8 +13335,8 @@@ txcmplq_cnt++; lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0387 NO IOCBQ data: txq_cnt=%d iocb_cnt=%d " - "fcp_txcmplq_cnt=%d, els_txcmplq_cnt=%d\n", + "els_txcmplq_cnt=%d\n", txq_cnt, phba->iocb_cnt, - fcp_txcmplq_cnt, txcmplq_cnt); return false; } @@@ -13592,6 -13687,7 +13687,7 @@@ __lpfc_sli4_process_cq(struct lpfc_hba phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed, LPFC_QUEUE_NOARM); consumed = 0; + cq->assoc_qp->q_flag |= HBA_EQ_DELAY_CHK; }
if (count == LPFC_NVMET_CQ_NOTIFY) @@@ -14220,7 -14316,7 +14316,7 @@@ lpfc_sli4_hba_intr_handler(int irq, voi spin_lock_irqsave(&phba->hbalock, iflag); if (phba->link_state < LPFC_LINK_DOWN) /* Flush, clear interrupt, and rearm the EQ */ - lpfc_sli4_eq_flush(phba, fpeq); + lpfc_sli4_eqcq_flush(phba, fpeq); spin_unlock_irqrestore(&phba->hbalock, iflag); return IRQ_NONE; } @@@ -14230,14 -14326,14 +14326,14 @@@ fpeq->last_cpu = raw_smp_processor_id();
if (icnt > LPFC_EQD_ISR_TRIGGER && - phba->cfg_irq_chann == 1 && + fpeq->q_flag & HBA_EQ_DELAY_CHK && phba->cfg_auto_imax && fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY && phba->sli.sli_flag & LPFC_SLI_USE_EQDR) lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY);
/* process and rearm the EQ */ - ecount = lpfc_sli4_process_eq(phba, fpeq); + ecount = lpfc_sli4_process_eq(phba, fpeq, LPFC_QUEUE_REARM);
if (unlikely(ecount == 0)) { fpeq->EQ_no_entry++; @@@ -14297,6 -14393,147 +14393,147 @@@ lpfc_sli4_intr_handler(int irq, void *d return (hba_handled == true) ? IRQ_HANDLED : IRQ_NONE; } /* lpfc_sli4_intr_handler */
+ void lpfc_sli4_poll_hbtimer(struct timer_list *t) + { + struct lpfc_hba *phba = from_timer(phba, t, cpuhp_poll_timer); + struct lpfc_queue *eq; + int i = 0; + + rcu_read_lock(); + + list_for_each_entry_rcu(eq, &phba->poll_list, _poll_list) + i += lpfc_sli4_poll_eq(eq, LPFC_POLL_SLOWPATH); + if (!list_empty(&phba->poll_list)) + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + + rcu_read_unlock(); + } + + inline int lpfc_sli4_poll_eq(struct lpfc_queue *eq, uint8_t path) + { + struct lpfc_hba *phba = eq->phba; + int i = 0; + + /* + * Unlocking an irq is one of the entry point to check + * for re-schedule, but we are good for io submission + * path as midlayer does a get_cpu to glue us in. Flush + * out the invalidate queue so we can see the updated + * value for flag. + */ + smp_rmb(); + + if (READ_ONCE(eq->mode) == LPFC_EQ_POLL) + /* We will not likely get the completion for the caller + * during this iteration but i guess that's fine. + * Future io's coming on this eq should be able to + * pick it up. As for the case of single io's, they + * will be handled through a sched from polling timer + * function which is currently triggered every 1msec. + */ + i = lpfc_sli4_process_eq(phba, eq, LPFC_QUEUE_NOARM); + + return i; + } + + static inline void lpfc_sli4_add_to_poll_list(struct lpfc_queue *eq) + { + struct lpfc_hba *phba = eq->phba; + + if (list_empty(&phba->poll_list)) { + timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); + /* kickstart slowpath processing for this eq */ + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + } + + list_add_rcu(&eq->_poll_list, &phba->poll_list); + synchronize_rcu(); + } + + static inline void lpfc_sli4_remove_from_poll_list(struct lpfc_queue *eq) + { + struct lpfc_hba *phba = eq->phba; + + /* Disable slowpath processing for this eq. Kick start the eq + * by RE-ARMING the eq's ASAP + */ + list_del_rcu(&eq->_poll_list); + synchronize_rcu(); + + if (list_empty(&phba->poll_list)) + del_timer_sync(&phba->cpuhp_poll_timer); + } + + void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba) + { + struct lpfc_queue *eq, *next; + + list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list) + list_del(&eq->_poll_list); + + INIT_LIST_HEAD(&phba->poll_list); + synchronize_rcu(); + } + + static inline void + __lpfc_sli4_switch_eqmode(struct lpfc_queue *eq, uint8_t mode) + { + if (mode == eq->mode) + return; + /* + * currently this function is only called during a hotplug + * event and the cpu on which this function is executing + * is going offline. By now the hotplug has instructed + * the scheduler to remove this cpu from cpu active mask. + * So we don't need to work about being put aside by the + * scheduler for a high priority process. Yes, the inte- + * rrupts could come but they are known to retire ASAP. + */ + + /* Disable polling in the fastpath */ + WRITE_ONCE(eq->mode, mode); + /* flush out the store buffer */ + smp_wmb(); + + /* + * Add this eq to the polling list and start polling. For + * a grace period both interrupt handler and poller will + * try to process the eq _but_ that's fine. We have a + * synchronization mechanism in place (queue_claimed) to + * deal with it. This is just a draining phase for int- + * errupt handler (not eq's) as we have guranteed through + * barrier that all the CPUs have seen the new CQ_POLLED + * state. which will effectively disable the REARMING of + * the EQ. The whole idea is eq's die off eventually as + * we are not rearming EQ's anymore. + */ + mode ? lpfc_sli4_add_to_poll_list(eq) : + lpfc_sli4_remove_from_poll_list(eq); + } + + void lpfc_sli4_start_polling(struct lpfc_queue *eq) + { + __lpfc_sli4_switch_eqmode(eq, LPFC_EQ_POLL); + } + + void lpfc_sli4_stop_polling(struct lpfc_queue *eq) + { + struct lpfc_hba *phba = eq->phba; + + __lpfc_sli4_switch_eqmode(eq, LPFC_EQ_INTERRUPT); + + /* Kick start for the pending io's in h/w. + * Once we switch back to interrupt processing on a eq + * the io path completion will only arm eq's when it + * receives a completion. But since eq's are in disa- + * rmed state it doesn't receive a completion. This + * creates a deadlock scenaro. + */ + phba->sli4_hba.sli4_write_eq_db(phba, eq, 0, LPFC_QUEUE_REARM); + } + /** * lpfc_sli4_queue_free - free a queue structure and associated memory * @queue: The queue structure to free. @@@ -14371,6 -14608,7 +14608,7 @@@ lpfc_sli4_queue_alloc(struct lpfc_hba * return NULL;
INIT_LIST_HEAD(&queue->list); + INIT_LIST_HEAD(&queue->_poll_list); INIT_LIST_HEAD(&queue->wq_list); INIT_LIST_HEAD(&queue->wqfull_list); INIT_LIST_HEAD(&queue->page_list); @@@ -18124,8 -18362,9 +18362,9 @@@ lpfc_sli4_alloc_rpi(struct lpfc_hba *ph phba->sli4_hba.max_cfg_param.rpi_used++; phba->sli4_hba.rpi_count++; } - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "0001 rpi:%x max:%x lim:%x\n", + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0001 Allocated rpi:x%x max:x%x lim:x%x\n", (int) rpi, max_rpi, rpi_limit);
/* @@@ -18181,11 -18420,19 +18420,19 @@@ static void __lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi) { + /* + * if the rpi value indicates a prior unreg has already + * been done, skip the unreg. + */ + if (rpi == LPFC_RPI_ALLOC_ERROR) + return; + if (test_and_clear_bit(rpi, phba->sli4_hba.rpi_bmask)) { phba->sli4_hba.rpi_count--; phba->sli4_hba.max_cfg_param.rpi_used--; } else { - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "2016 rpi %x not inuse\n", rpi); } @@@ -19683,6 -19930,8 +19930,8 @@@ lpfc_sli4_issue_wqe(struct lpfc_hba *ph
lpfc_sli_ringtxcmpl_put(phba, pring, pwqe); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH); return 0; }
@@@ -19703,6 -19952,8 +19952,8 @@@ } lpfc_sli_ringtxcmpl_put(phba, pring, pwqe); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH); return 0; }
@@@ -19731,6 -19982,8 +19982,8 @@@ } lpfc_sli_ringtxcmpl_put(phba, pring, pwqe); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH); return 0; } return WQE_ERROR; @@@ -20093,6 -20346,13 +20346,13 @@@ void lpfc_release_io_buf(struct lpfc_hb lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL; lpfc_ncmd->cur_iocbq.iocb_cmpl = NULL;
+ if (phba->cfg_xpsgl && !phba->nvmet_support && + !list_empty(&lpfc_ncmd->dma_sgl_xtra_list)) + lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd); + + if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list)) + lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd); + if (phba->cfg_xri_rebalancing) { if (lpfc_ncmd->expedite) { /* Return to expedite pool */ @@@ -20157,13 -20417,6 +20417,6 @@@ spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag); } - - if (phba->cfg_xpsgl && !phba->nvmet_support && - !list_empty(&lpfc_ncmd->dma_sgl_xtra_list)) - lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd); - - if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list)) - lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd); }
/** @@@ -20399,8 -20652,9 +20652,9 @@@ lpfc_get_sgl_per_hdwq(struct lpfc_hba * struct sli4_hybrid_sgl *allocated_sgl = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->sgl_list; + unsigned long iflags;
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags);
if (likely(!list_empty(buf_list))) { /* break off 1 chunk from the sgl_list */ @@@ -20412,9 -20666,9 +20666,9 @@@ } } else { /* allocate more */ - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC, - cpu_to_node(smp_processor_id())); + cpu_to_node(hdwq->io_wq->chann)); if (!tmp) { lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "8353 error kmalloc memory for HDWQ " @@@ -20434,7 -20688,7 +20688,7 @@@ return NULL; }
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); list_add_tail(&tmp->list_node, &lpfc_buf->dma_sgl_xtra_list); }
@@@ -20442,7 -20696,7 +20696,7 @@@ struct sli4_hybrid_sgl, list_node);
- spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags);
return allocated_sgl; } @@@ -20466,8 -20720,9 +20720,9 @@@ lpfc_put_sgl_per_hdwq(struct lpfc_hba * struct sli4_hybrid_sgl *tmp = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->sgl_list; + unsigned long iflags;
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags);
if (likely(!list_empty(&lpfc_buf->dma_sgl_xtra_list))) { list_for_each_entry_safe(list_entry, tmp, @@@ -20480,7 -20735,7 +20735,7 @@@ rc = -EINVAL; }
- spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); return rc; }
@@@ -20501,8 -20756,9 +20756,9 @@@ lpfc_free_sgl_per_hdwq(struct lpfc_hba struct list_head *buf_list = &hdwq->sgl_list; struct sli4_hybrid_sgl *list_entry = NULL; struct sli4_hybrid_sgl *tmp = NULL; + unsigned long iflags;
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags);
/* Free sgl pool */ list_for_each_entry_safe(list_entry, tmp, @@@ -20514,7 -20770,7 +20770,7 @@@ kfree(list_entry); }
- spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); }
/** @@@ -20538,8 -20794,9 +20794,9 @@@ lpfc_get_cmd_rsp_buf_per_hdwq(struct lp struct fcp_cmd_rsp_buf *allocated_buf = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->cmd_rsp_buf_list; + unsigned long iflags;
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags);
if (likely(!list_empty(buf_list))) { /* break off 1 chunk from the list */ @@@ -20552,9 -20809,9 +20809,9 @@@ } } else { /* allocate more */ - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC, - cpu_to_node(smp_processor_id())); + cpu_to_node(hdwq->io_wq->chann)); if (!tmp) { lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "8355 error kmalloc memory for HDWQ " @@@ -20579,7 -20836,7 +20836,7 @@@ tmp->fcp_rsp = (struct fcp_rsp *)((uint8_t *)tmp->fcp_cmnd + sizeof(struct fcp_cmnd));
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); list_add_tail(&tmp->list_node, &lpfc_buf->dma_cmd_rsp_list); }
@@@ -20587,7 -20844,7 +20844,7 @@@ struct fcp_cmd_rsp_buf, list_node);
- spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags);
return allocated_buf; } @@@ -20612,8 -20869,9 +20869,9 @@@ lpfc_put_cmd_rsp_buf_per_hdwq(struct lp struct fcp_cmd_rsp_buf *tmp = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->cmd_rsp_buf_list; + unsigned long iflags;
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags);
if (likely(!list_empty(&lpfc_buf->dma_cmd_rsp_list))) { list_for_each_entry_safe(list_entry, tmp, @@@ -20626,7 -20884,7 +20884,7 @@@ rc = -EINVAL; }
- spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); return rc; }
@@@ -20647,8 -20905,9 +20905,9 @@@ lpfc_free_cmd_rsp_buf_per_hdwq(struct l struct list_head *buf_list = &hdwq->cmd_rsp_buf_list; struct fcp_cmd_rsp_buf *list_entry = NULL; struct fcp_cmd_rsp_buf *tmp = NULL; + unsigned long iflags;
- spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags);
/* Free cmd_rsp buf pool */ list_for_each_entry_safe(list_entry, tmp, @@@ -20661,5 -20920,5 +20920,5 @@@ kfree(list_entry); }
- spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); } diff --combined drivers/scsi/qla2xxx/qla_attr.c index 7259bce85e0e,481c05dbea06..ae97e2f310a3 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@@ -102,8 -102,10 +102,10 @@@ qla2x00_sysfs_write_fw_dump(struct fil qla8044_idc_lock(ha); qla82xx_set_reset_owner(vha); qla8044_idc_unlock(ha); - } else + } else { + ha->fw_dump_mpi = 1; qla2x00_system_error(vha); + } break; case 4: if (IS_P3P_TYPE(ha)) { @@@ -440,6 -442,9 +442,6 @@@ qla2x00_sysfs_write_optrom_ctl(struct f valid = 0; if (ha->optrom_size == OPTROM_SIZE_2300 && start == 0) valid = 1; - else if (start == (ha->flt_region_boot * 4) || - start == (ha->flt_region_fw * 4)) - valid = 1; else if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) valid = 1; if (!valid) { @@@ -486,10 -491,8 +488,10 @@@ "Writing flash region -- 0x%x/0x%x.\n", ha->optrom_region_start, ha->optrom_region_size);
- ha->isp_ops->write_optrom(vha, ha->optrom_buffer, + rval = ha->isp_ops->write_optrom(vha, ha->optrom_buffer, ha->optrom_region_start, ha->optrom_region_size); + if (rval) + rval = -EIO; break; default: rval = -EINVAL; diff --combined drivers/scsi/qla2xxx/qla_mbx.c index 4a1f21c11758,4eb88c3ee08e..0cf94f05f008 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@@ -702,7 -702,6 +702,7 @@@ qla2x00_execute_fw(scsi_qla_host_t *vha mcp->mb[2] = LSW(risc_addr); mcp->mb[3] = 0; mcp->mb[4] = 0; + mcp->mb[11] = 0; ha->flags.using_lr_setting = 0; if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { @@@ -747,7 -746,7 +747,7 @@@ if (ha->flags.exchoffld_enabled) mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD;
- mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1; + mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11; mcp->in_mb |= MBX_3 | MBX_2 | MBX_1; } else { mcp->mb[1] = LSW(risc_addr); @@@ -1932,7 -1931,7 +1932,7 @@@ qla2x00_get_port_database(scsi_qla_host pd24 = (struct port_database_24xx *) pd;
/* Check for logged in state. */ - if (fcport->fc4f_nvme) { + if (NVME_TARGET(ha, fcport)) { current_login_state = pd24->current_login_state >> 4; last_login_state = pd24->last_login_state >> 4; } else { @@@ -3899,8 -3898,9 +3899,9 @@@ qla24xx_report_id_acquisition(scsi_qla_ fcport->scan_state = QLA_FCPORT_FOUND; fcport->n2n_flag = 1; fcport->keep_nport_handle = 1; + fcport->fc4_type = FS_FC4TYPE_FCP; if (vha->flags.nvme_enabled) - fcport->fc4f_nvme = 1; + fcport->fc4_type |= FS_FC4TYPE_NVME;
switch (fcport->disc_state) { case DSC_DELETED: @@@ -6287,17 -6287,13 +6288,13 @@@ int qla24xx_send_mb_cmd(struct scsi_qla case QLA_SUCCESS: ql_dbg(ql_dbg_mbx, vha, 0x119d, "%s: %s done.\n", __func__, sp->name); - sp->free(sp); break; default: ql_dbg(ql_dbg_mbx, vha, 0x119e, "%s: %s Failed. %x.\n", __func__, sp->name, rval); - sp->free(sp); break; }
- return rval; - done_free_sp: sp->free(sp); done: @@@ -6362,7 -6358,7 +6359,7 @@@ int __qla24xx_parse_gpdb(struct scsi_ql uint64_t zero = 0; u8 current_login_state, last_login_state;
- if (fcport->fc4f_nvme) { + if (NVME_TARGET(vha->hw, fcport)) { current_login_state = pd->current_login_state >> 4; last_login_state = pd->last_login_state >> 4; } else { @@@ -6397,8 -6393,8 +6394,8 @@@ fcport->d_id.b.al_pa = pd->port_id[2]; fcport->d_id.b.rsvd_1 = 0;
- if (fcport->fc4f_nvme) { - fcport->port_type = 0; + if (NVME_TARGET(vha->hw, fcport)) { + fcport->port_type = FCT_NVME; if ((pd->prli_svc_param_word_3[0] & BIT_5) == 0) fcport->port_type |= FCT_NVME_INITIATOR; if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0) diff --combined drivers/scsi/qla2xxx/qla_mid.c index 238240984bc1,bd62c4595b73..eabc5127174e --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@@ -76,11 -76,9 +76,11 @@@ qla24xx_deallocate_vp_id(scsi_qla_host_ * ensures no active vp_list traversal while the vport is removed * from the queue) */ - for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++) - wait_event_timeout(vha->vref_waitq, - atomic_read(&vha->vref_count), HZ); + for (i = 0; i < 10; i++) { + if (wait_event_timeout(vha->vref_waitq, + !atomic_read(&vha->vref_count), HZ) > 0) + break; + }
spin_lock_irqsave(&ha->vport_slock, flags); if (atomic_read(&vha->vref_count)) { @@@ -946,7 -944,7 +946,7 @@@ int qla24xx_control_vp(scsi_qla_host_t
sp = qla2x00_get_sp(base_vha, NULL, GFP_KERNEL); if (!sp) - goto done; + return rval;
sp->type = SRB_CTRL_VP; sp->name = "ctrl_vp"; @@@ -962,7 -960,7 +962,7 @@@ ql_dbg(ql_dbg_async, vha, 0xffff, "%s: %s Failed submission. %x.\n", __func__, sp->name, rval); - goto done_free_sp; + goto done; }
ql_dbg(ql_dbg_vport, vha, 0x113f, "%s hndl %x submitted\n", @@@ -980,16 -978,13 +980,13 @@@ case QLA_SUCCESS: ql_dbg(ql_dbg_vport, vha, 0xffff, "%s: %s done.\n", __func__, sp->name); - goto done_free_sp; + break; default: ql_dbg(ql_dbg_vport, vha, 0xffff, "%s: %s Failed. %x.\n", __func__, sp->name, rval); - goto done_free_sp; + break; } done: - return rval; - - done_free_sp: sp->free(sp); return rval; } diff --combined drivers/scsi/qla2xxx/qla_os.c index 726ad4cbf4a6,2450ba933bb2..8b84bc4a6ac8 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@@ -698,11 -698,6 +698,6 @@@ void qla2x00_sp_compl(srb_t *sp, int re struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct completion *comp = sp->comp;
- if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0)) - return; - - atomic_dec(&sp->ref_count); - sp->free(sp); cmd->result = res; CMD_SP(cmd) = NULL; @@@ -794,11 -789,6 +789,6 @@@ void qla2xxx_qpair_sp_compl(srb_t *sp, struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct completion *comp = sp->comp;
- if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0)) - return; - - atomic_dec(&sp->ref_count); - sp->free(sp); cmd->result = res; CMD_SP(cmd) = NULL; @@@ -903,7 -893,7 +893,7 @@@ qla2xxx_queuecommand(struct Scsi_Host *
sp->u.scmd.cmd = cmd; sp->type = SRB_SCSI_CMD; - atomic_set(&sp->ref_count, 1); + CMD_SP(cmd) = (void *)sp; sp->free = qla2x00_sp_free_dma; sp->done = qla2x00_sp_compl; @@@ -985,18 -975,16 +975,16 @@@ qla2xxx_mqueuecommand(struct Scsi_Host
sp->u.scmd.cmd = cmd; sp->type = SRB_SCSI_CMD; - atomic_set(&sp->ref_count, 1); CMD_SP(cmd) = (void *)sp; sp->free = qla2xxx_qpair_sp_free_dma; sp->done = qla2xxx_qpair_sp_compl; - sp->qpair = qpair;
rval = ha->isp_ops->start_scsi_mq(sp); if (rval != QLA_SUCCESS) { ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3078, "Start scsi failed rval=%d for cmd=%p.\n", rval, cmd); if (rval == QLA_INTERFACE_ERROR) - goto qc24_fail_command; + goto qc24_free_sp_fail_command; goto qc24_host_busy_free_sp; }
@@@ -1008,6 -996,11 +996,11 @@@ qc24_host_busy_free_sp qc24_target_busy: return SCSI_MLQUEUE_TARGET_BUSY;
+ qc24_free_sp_fail_command: + sp->free(sp); + CMD_SP(cmd) = NULL; + qla2xxx_rel_qpair_sp(sp->qpair, sp); + qc24_fail_command: cmd->scsi_done(cmd);
@@@ -1119,11 -1112,9 +1112,11 @@@ qla2x00_wait_for_sess_deletion(scsi_qla
qla2x00_mark_all_devices_lost(vha, 0);
- for (i = 0; i < 10; i++) - wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), - HZ); + for (i = 0; i < 10; i++) { + if (wait_event_timeout(vha->fcport_waitQ, + test_fcport_count(vha), HZ) > 0) + break; + }
flush_workqueue(vha->hw->wq); } @@@ -1184,16 -1175,6 +1177,6 @@@ qla2x00_wait_for_chip_reset(scsi_qla_ho return return_status; }
- static int - sp_get(struct srb *sp) - { - if (!refcount_inc_not_zero((refcount_t *)&sp->ref_count)) - /* kref get fail */ - return ENXIO; - else - return 0; - } - #define ISP_REG_DISCONNECT 0xffffffffU /************************************************************************** * qla2x00_isp_reg_stat @@@ -1249,6 -1230,9 +1232,9 @@@ qla2xxx_eh_abort(struct scsi_cmnd *cmd uint64_t lun; int rval; struct qla_hw_data *ha = vha->hw; + uint32_t ratov_j; + struct qla_qpair *qpair; + unsigned long flags;
if (qla2x00_isp_reg_stat(ha)) { ql_log(ql_log_info, vha, 0x8042, @@@ -1261,13 -1245,26 +1247,26 @@@ return ret;
sp = scsi_cmd_priv(cmd); + qpair = sp->qpair;
- if (sp->fcport && sp->fcport->deleted) + if ((sp->fcport && sp->fcport->deleted) || !qpair) return SUCCESS;
- /* Return if the command has already finished. */ - if (sp_get(sp)) + spin_lock_irqsave(qpair->qp_lock_ptr, flags); + if (sp->completed) { + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); return SUCCESS; + } + + if (sp->abort || sp->aborted) { + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + return FAILED; + } + + sp->abort = 1; + sp->comp = ∁ + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); +
id = cmd->device->id; lun = cmd->device->lun; @@@ -1276,47 -1273,37 +1275,37 @@@ "Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n", vha->host_no, id, lun, sp, cmd, sp->handle);
+ /* + * Abort will release the original Command/sp from FW. Let the + * original command call scsi_done. In return, he will wakeup + * this sleeping thread. + */ rval = ha->isp_ops->abort_command(sp); + ql_dbg(ql_dbg_taskm, vha, 0x8003, "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
+ /* Wait for the command completion. */ + ratov_j = ha->r_a_tov/10 * 4 * 1000; + ratov_j = msecs_to_jiffies(ratov_j); switch (rval) { case QLA_SUCCESS: - /* - * The command has been aborted. That means that the firmware - * won't report a completion. - */ - sp->done(sp, DID_ABORT << 16); - ret = SUCCESS; - break; - case QLA_FUNCTION_PARAMETER_ERROR: { - /* Wait for the command completion. */ - uint32_t ratov = ha->r_a_tov/10; - uint32_t ratov_j = msecs_to_jiffies(4 * ratov * 1000); - - WARN_ON_ONCE(sp->comp); - sp->comp = ∁ if (!wait_for_completion_timeout(&comp, ratov_j)) { ql_dbg(ql_dbg_taskm, vha, 0xffff, "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n", - __func__, ha->r_a_tov); + __func__, ha->r_a_tov/10); ret = FAILED; } else { ret = SUCCESS; } break; - } default: - /* - * Either abort failed or abort and completion raced. Let - * the SCSI core retry the abort in the former case. - */ ret = FAILED; break; }
sp->comp = NULL; - atomic_dec(&sp->ref_count); + ql_log(ql_log_info, vha, 0x801c, "Abort command issued nexus=%ld:%d:%llu -- %x.\n", vha->host_no, id, lun, ret); @@@ -1708,32 -1695,53 +1697,53 @@@ static void qla2x00_abort_srb(struct ql scsi_qla_host_t *vha = qp->vha; struct qla_hw_data *ha = vha->hw; int rval; + bool ret_cmd; + uint32_t ratov_j;
- if (sp_get(sp)) + if (qla2x00_chip_is_down(vha)) { + sp->done(sp, res); return; + }
if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS || (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy && !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && !qla2x00_isp_reg_stat(ha))) { + if (sp->comp) { + sp->done(sp, res); + return; + } + sp->comp = ∁ + sp->abort = 1; spin_unlock_irqrestore(qp->qp_lock_ptr, *flags); - rval = ha->isp_ops->abort_command(sp);
+ rval = ha->isp_ops->abort_command(sp); + /* Wait for command completion. */ + ret_cmd = false; + ratov_j = ha->r_a_tov/10 * 4 * 1000; + ratov_j = msecs_to_jiffies(ratov_j); switch (rval) { case QLA_SUCCESS: - sp->done(sp, res); + if (wait_for_completion_timeout(&comp, ratov_j)) { + ql_dbg(ql_dbg_taskm, vha, 0xffff, + "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n", + __func__, ha->r_a_tov/10); + ret_cmd = true; + } + /* else FW return SP to driver */ break; - case QLA_FUNCTION_PARAMETER_ERROR: - wait_for_completion(&comp); + default: + ret_cmd = true; break; }
spin_lock_irqsave(qp->qp_lock_ptr, *flags); - sp->comp = NULL; + if (ret_cmd && (!sp->completed || !sp->aborted)) + sp->done(sp, res); + } else { + sp->done(sp, res); } - - atomic_dec(&sp->ref_count); }
static void @@@ -1755,7 -1763,6 +1765,6 @@@ __qla2x00_abort_all_cmds(struct qla_qpa for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { - req->outstanding_cmds[cnt] = NULL; switch (sp->cmd_type) { case TYPE_SRB: qla2x00_abort_srb(qp, sp, res, &flags); @@@ -1777,6 -1784,7 +1786,7 @@@ default: break; } + req->outstanding_cmds[cnt] = NULL; } } spin_unlock_irqrestore(qp->qp_lock_ptr, flags); @@@ -3226,10 -3234,6 +3236,10 @@@ qla2x00_probe_one(struct pci_dev *pdev req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out);
ha->wq = alloc_workqueue("qla2xxx_wq", 0, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + }
if (ha->isp_ops->initialize_adapter(base_vha)) { ql_log(ql_log_fatal, base_vha, 0x00d6, @@@ -3492,6 -3496,29 +3502,29 @@@ disable_device return ret; }
+ static void __qla_set_remove_flag(scsi_qla_host_t *base_vha) + { + scsi_qla_host_t *vp; + unsigned long flags; + struct qla_hw_data *ha; + + if (!base_vha) + return; + + ha = base_vha->hw; + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) + set_bit(PFLG_DRIVER_REMOVING, &vp->pci_flags); + + /* + * Indicate device removal to prevent future board_disable + * and wait until any pending board_disable has completed. + */ + set_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags); + spin_unlock_irqrestore(&ha->vport_slock, flags); + } + static void qla2x00_shutdown(struct pci_dev *pdev) { @@@ -3508,7 -3535,7 +3541,7 @@@ * Prevent future board_disable and wait * until any pending board_disable has completed. */ - set_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags); + __qla_set_remove_flag(vha); cancel_work_sync(&ha->board_disable);
if (!atomic_read(&pdev->enable_cnt)) @@@ -3537,10 -3564,6 +3570,10 @@@ qla2x00_try_to_stop_firmware(vha); }
+ /* Disable timer */ + if (vha->timer_active) + qla2x00_stop_timer(vha); + /* Turn adapter off line */ vha->flags.online = 0;
@@@ -3668,10 -3691,7 +3701,7 @@@ qla2x00_remove_one(struct pci_dev *pdev ha = base_vha->hw; ql_log(ql_log_info, base_vha, 0xb079, "Removing driver\n"); - - /* Indicate device removal to prevent future board_disable and wait - * until any pending board_disable has completed. */ - set_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags); + __qla_set_remove_flag(base_vha); cancel_work_sync(&ha->board_disable);
/* @@@ -4666,7 -4686,8 +4696,8 @@@ qla2x00_mem_free(struct qla_hw_data *ha ha->sfp_data = NULL;
if (ha->flt) - dma_free_coherent(&ha->pdev->dev, SFP_DEV_SIZE, + dma_free_coherent(&ha->pdev->dev, + sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE, ha->flt, ha->flt_dma); ha->flt = NULL; ha->flt_dma = 0; @@@ -5042,19 -5063,17 +5073,17 @@@ void qla24xx_create_new_sess(struct scs fcport->d_id = e->u.new_sess.id; fcport->flags |= FCF_FABRIC_DEVICE; fcport->fw_login_state = DSC_LS_PLOGI_PEND; - if (e->u.new_sess.fc4_type == FS_FC4TYPE_FCP) - fcport->fc4_type = FC4_TYPE_FCP_SCSI; - - if (e->u.new_sess.fc4_type == FS_FC4TYPE_NVME) { - fcport->fc4_type = FC4_TYPE_OTHER; - fcport->fc4f_nvme = FC4_TYPE_NVME; - }
memcpy(fcport->port_name, e->u.new_sess.port_name, WWN_SIZE);
- if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) + fcport->fc4_type = e->u.new_sess.fc4_type; + if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) { + fcport->fc4_type = FS_FC4TYPE_FCP; fcport->n2n_flag = 1; + if (vha->flags.nvme_enabled) + fcport->fc4_type |= FS_FC4TYPE_NVME; + }
} else { ql_dbg(ql_dbg_disc, vha, 0xffff, @@@ -5158,7 -5177,8 +5187,8 @@@ fcport->flags &= ~FCF_FABRIC_DEVICE; fcport->keep_nport_handle = 1; if (vha->flags.nvme_enabled) { - fcport->fc4f_nvme = 1; + fcport->fc4_type = + (FS_FC4TYPE_NVME | FS_FC4TYPE_FCP); fcport->n2n_flag = 1; } fcport->fw_login_state = 0; diff --combined drivers/scsi/scsi.c index 7a1b6c76f263,adfe8b3693d5..930e4803d888 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@@ -186,7 -186,7 +186,7 @@@ void scsi_finish_command(struct scsi_cm struct scsi_driver *drv; unsigned int good_bytes;
- scsi_device_unbusy(sdev); + scsi_device_unbusy(sdev, cmd);
/* * Clear the flags that say that the device/target/host is no longer @@@ -434,8 -434,8 +434,8 @@@ static void scsi_update_vpd_page(struc return;
mutex_lock(&sdev->inquiry_mutex); - rcu_swap_protected(*sdev_vpd_buf, vpd_buf, - lockdep_is_held(&sdev->inquiry_mutex)); + vpd_buf = rcu_replace_pointer(*sdev_vpd_buf, vpd_buf, + lockdep_is_held(&sdev->inquiry_mutex)); mutex_unlock(&sdev->inquiry_mutex);
if (vpd_buf) @@@ -465,10 -465,14 +465,14 @@@ void scsi_attach_vpd(struct scsi_devic return;
for (i = 4; i < vpd_buf->len; i++) { + if (vpd_buf->data[i] == 0x0) + scsi_update_vpd_page(sdev, 0x0, &sdev->vpd_pg0); if (vpd_buf->data[i] == 0x80) scsi_update_vpd_page(sdev, 0x80, &sdev->vpd_pg80); if (vpd_buf->data[i] == 0x83) scsi_update_vpd_page(sdev, 0x83, &sdev->vpd_pg83); + if (vpd_buf->data[i] == 0x89) + scsi_update_vpd_page(sdev, 0x89, &sdev->vpd_pg89); } kfree(vpd_buf); } diff --combined drivers/scsi/scsi_lib.c index 91c007d26c1e,2563b061f56b..3e7a45d0daca --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@@ -189,7 -189,7 +189,7 @@@ static void __scsi_queue_insert(struct * active on the host/device. */ if (unbusy) - scsi_device_unbusy(device); + scsi_device_unbusy(device, cmd);
/* * Requeue this command. It will go before all other commands @@@ -321,20 -321,20 +321,20 @@@ static void scsi_init_cmd_errh(struct s }
/* - * Decrement the host_busy counter and wake up the error handler if necessary. - * Avoid as follows that the error handler is not woken up if shost->host_busy - * == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination - * with an RCU read lock in this function to ensure that this function in its - * entirety either finishes before scsi_eh_scmd_add() increases the + * Wake up the error handler if necessary. Avoid as follows that the error + * handler is not woken up if host in-flight requests number == + * shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination + * with an RCU read lock in this function to ensure that this function in + * its entirety either finishes before scsi_eh_scmd_add() increases the * host_failed counter or that it notices the shost state change made by * scsi_eh_scmd_add(). */ - static void scsi_dec_host_busy(struct Scsi_Host *shost) + static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { unsigned long flags;
rcu_read_lock(); - atomic_dec(&shost->host_busy); + __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) @@@ -344,12 -344,12 +344,12 @@@ rcu_read_unlock(); }
- void scsi_device_unbusy(struct scsi_device *sdev) + void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd) { struct Scsi_Host *shost = sdev->host; struct scsi_target *starget = scsi_target(sdev);
- scsi_dec_host_busy(shost); + scsi_dec_host_busy(shost, cmd);
if (starget->can_queue > 0) atomic_dec(&starget->target_busy); @@@ -430,9 -430,6 +430,6 @@@ static inline bool scsi_target_is_busy(
static inline bool scsi_host_is_busy(struct Scsi_Host *shost) { - if (shost->can_queue > 0 && - atomic_read(&shost->host_busy) >= shost->can_queue) - return true; if (atomic_read(&shost->host_blocked) > 0) return true; if (shost->host_self_blocked) @@@ -1139,6 -1136,7 +1136,7 @@@ void scsi_init_command(struct scsi_devi unsigned int flags = cmd->flags & SCMD_PRESERVED_FLAGS; unsigned long jiffies_at_alloc; int retries; + bool in_flight;
if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) { flags |= SCMD_INITIALIZED; @@@ -1147,6 -1145,7 +1145,7 @@@
jiffies_at_alloc = cmd->jiffies_at_alloc; retries = cmd->retries; + in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state); /* zero out the cmd, except for the embedded scsi_request */ memset((char *)cmd + sizeof(cmd->req), 0, sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size); @@@ -1158,6 -1157,8 +1157,8 @@@ INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); cmd->jiffies_at_alloc = jiffies_at_alloc; cmd->retries = retries; + if (in_flight) + __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
scsi_add_cmd_to_list(cmd); } @@@ -1367,16 -1368,14 +1368,14 @@@ out_dec */ static inline int scsi_host_queue_ready(struct request_queue *q, struct Scsi_Host *shost, - struct scsi_device *sdev) + struct scsi_device *sdev, + struct scsi_cmnd *cmd) { - unsigned int busy; - if (scsi_host_in_recovery(shost)) return 0;
- busy = atomic_inc_return(&shost->host_busy) - 1; if (atomic_read(&shost->host_blocked) > 0) { - if (busy) + if (scsi_host_busy(shost) > 0) goto starved;
/* @@@ -1390,8 -1389,6 +1389,6 @@@ "unblocking host at zero depth\n")); }
- if (shost->can_queue > 0 && busy >= shost->can_queue) - goto starved; if (shost->host_self_blocked) goto starved;
@@@ -1403,6 -1400,8 +1400,8 @@@ spin_unlock_irq(shost->host_lock); }
+ __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); + return 1;
starved: @@@ -1411,7 -1410,7 +1410,7 @@@ list_add_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: - scsi_dec_host_busy(shost); + scsi_dec_host_busy(shost, cmd); return 0; }
@@@ -1665,7 -1664,7 +1664,7 @@@ static blk_status_t scsi_queue_rq(struc ret = BLK_STS_RESOURCE; if (!scsi_target_queue_ready(shost, sdev)) goto out_put_budget; - if (!scsi_host_queue_ready(q, shost, sdev)) + if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy;
if (!(req->rq_flags & RQF_DONTPREP)) { @@@ -1697,7 -1696,7 +1696,7 @@@ return BLK_STS_OK;
out_dec_host_busy: - scsi_dec_host_busy(shost); + scsi_dec_host_busy(shost, cmd); out_dec_target_busy: if (scsi_target(sdev)->can_queue > 0) atomic_dec(&scsi_target(sdev)->target_busy); @@@ -1834,7 -1833,6 +1833,7 @@@ static const struct blk_mq_ops scsi_mq_ .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .initialize_rq_fn = scsi_initialize_rq, + .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, }; @@@ -1883,8 -1881,7 +1882,8 @@@ int scsi_mq_setup_tags(struct Scsi_Hos { unsigned int cmd_size, sgl_size;
- sgl_size = scsi_mq_inline_sgl_size(shost); + sgl_size = max_t(unsigned int, sizeof(struct scatterlist), + scsi_mq_inline_sgl_size(shost)); cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; if (scsi_host_get_prot(shost)) cmd_size += sizeof(struct scsi_data_buffer) + @@@ -1923,8 -1920,7 +1922,8 @@@ struct scsi_device *scsi_device_from_qu { struct scsi_device *sdev = NULL;
- if (q->mq_ops == &scsi_mq_ops) + if (q->mq_ops == &scsi_mq_ops_no_commit || + q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; diff --combined drivers/scsi/scsi_sysfs.c index cc51f4756077,2c76d7a43f67..677b5c5403d2 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@@ -437,6 -437,7 +437,7 @@@ static void scsi_device_dev_release_use struct device *parent; struct list_head *this, *tmp; struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; + struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; unsigned long flags;
sdev = container_of(work, struct scsi_device, ew.work); @@@ -466,16 -467,24 +467,24 @@@ sdev->request_queue = NULL;
mutex_lock(&sdev->inquiry_mutex); - rcu_swap_protected(sdev->vpd_pg0, vpd_pg0, - lockdep_is_held(&sdev->inquiry_mutex)); - rcu_swap_protected(sdev->vpd_pg80, vpd_pg80, - lockdep_is_held(&sdev->inquiry_mutex)); - rcu_swap_protected(sdev->vpd_pg83, vpd_pg83, - lockdep_is_held(&sdev->inquiry_mutex)); - rcu_swap_protected(sdev->vpd_pg89, vpd_pg89, - lockdep_is_held(&sdev->inquiry_mutex)); ++ vpd_pg0 = rcu_replace_pointer(sdev->vpd_pg0, vpd_pg0, ++ lockdep_is_held(&sdev->inquiry_mutex)); + vpd_pg80 = rcu_replace_pointer(sdev->vpd_pg80, vpd_pg80, + lockdep_is_held(&sdev->inquiry_mutex)); + vpd_pg83 = rcu_replace_pointer(sdev->vpd_pg83, vpd_pg83, + lockdep_is_held(&sdev->inquiry_mutex)); ++ vpd_pg89 = rcu_replace_pointer(sdev->vpd_pg89, vpd_pg89, ++ lockdep_is_held(&sdev->inquiry_mutex)); mutex_unlock(&sdev->inquiry_mutex);
+ if (vpd_pg0) + kfree_rcu(vpd_pg0, rcu); if (vpd_pg83) kfree_rcu(vpd_pg83, rcu); if (vpd_pg80) kfree_rcu(vpd_pg80, rcu); + if (vpd_pg89) + kfree_rcu(vpd_pg89, rcu); kfree(sdev->inquiry); kfree(sdev);
@@@ -730,14 -739,6 +739,14 @@@ sdev_store_delete(struct device *dev, s const char *buf, size_t count) { struct kernfs_node *kn; + struct scsi_device *sdev = to_scsi_device(dev); + + /* + * We need to try to get module, avoiding the module been removed + * during delete. + */ + if (scsi_device_get(sdev)) + return -ENODEV;
kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); @@@ -752,10 -753,9 +761,10 @@@ * state into SDEV_DEL. */ device_remove_file(dev, attr); - scsi_remove_device(to_scsi_device(dev)); + scsi_remove_device(sdev); if (kn) sysfs_unbreak_active_protection(kn); + scsi_device_put(sdev); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); @@@ -868,6 -868,8 +877,8 @@@ static struct bin_attribute dev_attr_vp
sdev_vpd_pg_attr(pg83); sdev_vpd_pg_attr(pg80); + sdev_vpd_pg_attr(pg89); + sdev_vpd_pg_attr(pg0);
static ssize_t show_inquiry(struct file *filep, struct kobject *kobj, struct bin_attribute *bin_attr, @@@ -1200,12 -1202,18 +1211,18 @@@ static umode_t scsi_sdev_bin_attr_is_vi struct scsi_device *sdev = to_scsi_device(dev);
+ if (attr == &dev_attr_vpd_pg0 && !sdev->vpd_pg0) + return 0; + if (attr == &dev_attr_vpd_pg80 && !sdev->vpd_pg80) return 0;
if (attr == &dev_attr_vpd_pg83 && !sdev->vpd_pg83) return 0;
+ if (attr == &dev_attr_vpd_pg89 && !sdev->vpd_pg89) + return 0; + return S_IRUGO; }
@@@ -1248,8 -1256,10 +1265,10 @@@ static struct attribute *scsi_sdev_attr };
static struct bin_attribute *scsi_sdev_bin_attrs[] = { + &dev_attr_vpd_pg0, &dev_attr_vpd_pg83, &dev_attr_vpd_pg80, + &dev_attr_vpd_pg89, &dev_attr_inquiry, NULL }; @@@ -1309,7 -1319,8 +1328,8 @@@ int scsi_sysfs_add_sdev(struct scsi_dev device_enable_async_suspend(&sdev->sdev_gendev); scsi_autopm_get_target(starget); pm_runtime_set_active(&sdev->sdev_gendev); - pm_runtime_forbid(&sdev->sdev_gendev); + if (!sdev->rpm_autosuspend) + pm_runtime_forbid(&sdev->sdev_gendev); pm_runtime_enable(&sdev->sdev_gendev); scsi_autopm_put_target(starget);
diff --combined drivers/scsi/sd.c index 13925021473d,326e2877f169..7dc17821f873 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@@ -1166,12 -1166,11 +1166,12 @@@ static blk_status_t sd_setup_read_write sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq)); sector_t threshold; unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); - bool dif, dix; unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; blk_status_t ret; + unsigned int dif; + bool dix;
ret = scsi_init_io(cmd); if (ret != BLK_STS_OK) @@@ -1291,17 -1290,9 +1291,17 @@@ static blk_status_t sd_init_command(str case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); case REQ_OP_ZONE_RESET: - return sd_zbc_setup_reset_cmnd(cmd, false); + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, + false); case REQ_OP_ZONE_RESET_ALL: - return sd_zbc_setup_reset_cmnd(cmd, true); + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, + true); + case REQ_OP_ZONE_OPEN: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false); + case REQ_OP_ZONE_CLOSE: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false); + case REQ_OP_ZONE_FINISH: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false); default: WARN_ON_ONCE(1); return BLK_STS_NOTSUPP; @@@ -1702,30 -1693,20 +1702,30 @@@ static void sd_rescan(struct device *de static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; + struct gendisk *disk = bdev->bd_disk; + struct scsi_disk *sdkp = scsi_disk(disk); + struct scsi_device *sdev = sdkp->device; + void __user *p = compat_ptr(arg); int error;
+ error = scsi_verify_blk_ioctl(bdev, cmd); + if (error < 0) + return error; + error = scsi_ioctl_block_when_processing_errors(sdev, cmd, (mode & FMODE_NDELAY) != 0); if (error) return error; + + if (is_sed_ioctl(cmd)) + return sed_ioctl(sdkp->opal_dev, cmd, p); /* * Let the static ioctl translation table take care of it. */ if (!sdev->host->hostt->compat_ioctl) return -ENOIOCTLCMD; - return sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); + return sdev->host->hostt->compat_ioctl(sdev, cmd, p); } #endif
@@@ -1979,9 -1960,6 +1979,9 @@@ static int sd_done(struct scsi_cmnd *SC case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!result) { good_bytes = blk_rq_bytes(req); scsi_set_resid(SCpnt, 0); @@@ -3390,6 -3368,10 +3390,10 @@@ static int sd_probe(struct device *dev }
blk_pm_runtime_init(sdp->request_queue, dev); + if (sdp->rpm_autosuspend) { + pm_runtime_set_autosuspend_delay(dev, + sdp->host->hostt->rpm_autosuspend_delay); + } device_add_disk(dev, gd, NULL); if (sdkp->capacity) sd_dif_config_host(sdkp); diff --combined drivers/scsi/sg.c index 9e4ef22b3579,0940abd91d3c..160748ad9c0f --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@@ -429,28 -429,14 +429,36 @@@ sg_read(struct file *filp, char __user SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_read: count=%d\n", (int) count));
- if (!access_ok(buf, count)) - return -EFAULT; if (sfp->force_packid && (count >= SZ_SG_HEADER)) { - old_hdr = kmalloc(SZ_SG_HEADER, GFP_KERNEL); - if (!old_hdr) - return -ENOMEM; - if (__copy_from_user(old_hdr, buf, SZ_SG_HEADER)) { - retval = -EFAULT; - goto free_old_hdr; - } + old_hdr = memdup_user(buf, SZ_SG_HEADER); + if (IS_ERR(old_hdr)) + return PTR_ERR(old_hdr); if (old_hdr->reply_len < 0) { if (count >= SZ_SG_IO_HDR) { - sg_io_hdr_t __user *p = (void __user *)buf; - if (get_user(req_pack_id, &p->pack_id)) { ++ /* ++ * This is stupid. ++ * ++ * We're copying the whole sg_io_hdr_t from user ++ * space just to get the 'pack_id' field. But the ++ * field is at different offsets for the compat ++ * case, so we'll use "get_sg_io_hdr()" to copy ++ * the whole thing and convert it. ++ * ++ * We could do something like just calculating the ++ * offset based of 'in_compat_syscall()', but the ++ * 'compat_sg_io_hdr' definition is in the wrong ++ * place for that. ++ */ + sg_io_hdr_t *new_hdr; + new_hdr = kmalloc(SZ_SG_IO_HDR, GFP_KERNEL); + if (!new_hdr) { + retval = -ENOMEM; + goto free_old_hdr; + } + retval = get_sg_io_hdr(new_hdr, buf); + req_pack_id = new_hdr->pack_id; + kfree(new_hdr); + if (retval) { retval = -EFAULT; goto free_old_hdr; } @@@ -537,7 -523,7 +545,7 @@@
/* Now copy the result back to the user buffer. */ if (count >= SZ_SG_HEADER) { - if (__copy_to_user(buf, old_hdr, SZ_SG_HEADER)) { + if (copy_to_user(buf, old_hdr, SZ_SG_HEADER)) { retval = -EFAULT; goto free_old_hdr; } @@@ -588,7 -574,10 +596,7 @@@ sg_new_read(Sg_fd * sfp, char __user *b } if (hp->masked_status || hp->host_status || hp->driver_status) hp->info |= SG_INFO_CHECK; - if (copy_to_user(buf, hp, SZ_SG_IO_HDR)) { - err = -EFAULT; - goto err_out; - } + err = put_sg_io_hdr(hp, buf); err_out: err2 = sg_finish_rem_req(srp); sg_remove_request(sfp, srp); @@@ -623,11 -612,9 +631,9 @@@ sg_write(struct file *filp, const char scsi_block_when_processing_errors(sdp->device))) return -ENXIO;
- if (!access_ok(buf, count)) - return -EFAULT; /* protects following copy_from_user()s + get_user()s */ if (count < SZ_SG_HEADER) return -EIO; - if (__copy_from_user(&old_hdr, buf, SZ_SG_HEADER)) + if (copy_from_user(&old_hdr, buf, SZ_SG_HEADER)) return -EFAULT; blocking = !(filp->f_flags & O_NONBLOCK); if (old_hdr.reply_len < 0) @@@ -636,13 -623,15 +642,15 @@@ if (count < (SZ_SG_HEADER + 6)) return -EIO; /* The minimum scsi command length is 6 bytes. */
+ buf += SZ_SG_HEADER; + if (get_user(opcode, buf)) + return -EFAULT; + if (!(srp = sg_add_request(sfp))) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sdp, "sg_write: queue full\n")); return -EDOM; } - buf += SZ_SG_HEADER; - __get_user(opcode, buf); mutex_lock(&sfp->f_mutex); if (sfp->next_cmd_len > 0) { cmd_size = sfp->next_cmd_len; @@@ -685,7 -674,7 +693,7 @@@ hp->flags = input_size; /* structure abuse ... */ hp->pack_id = old_hdr.pack_id; hp->usr_ptr = NULL; - if (__copy_from_user(cmnd, buf, cmd_size)) + if (copy_from_user(cmnd, buf, cmd_size)) return -EFAULT; /* * SG_DXFER_TO_FROM_DEV is functionally equivalent to SG_DXFER_FROM_DEV, @@@ -720,8 -709,6 +728,6 @@@ sg_new_write(Sg_fd *sfp, struct file *f
if (count < SZ_SG_IO_HDR) return -EINVAL; - if (!access_ok(buf, count)) - return -EFAULT; /* protects following copy_from_user()s + get_user()s */
sfp->cmd_q = 1; /* when sg_io_hdr seen, set command queuing on */ if (!(srp = sg_add_request(sfp))) { @@@ -731,7 -718,7 +737,7 @@@ } srp->sg_io_owned = sg_io_owned; hp = &srp->header; - if (copy_from_user(hp, buf, SZ_SG_IO_HDR)) { + if (get_sg_io_hdr(hp, buf)) { sg_remove_request(sfp, srp); return -EFAULT; } @@@ -759,11 -746,7 +765,7 @@@ sg_remove_request(sfp, srp); return -EMSGSIZE; } - if (!access_ok(hp->cmdp, hp->cmd_len)) { - sg_remove_request(sfp, srp); - return -EFAULT; /* protects following copy_from_user()s + get_user()s */ - } - if (__copy_from_user(cmnd, hp->cmdp, hp->cmd_len)) { + if (copy_from_user(cmnd, hp->cmdp, hp->cmd_len)) { sg_remove_request(sfp, srp); return -EFAULT; } @@@ -889,33 -872,6 +891,33 @@@ sg_fill_request_table(Sg_fd *sfp, sg_re } }
+#ifdef CONFIG_COMPAT +struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ + char req_state; + char orphan; + char sg_io_owned; + char problem; + int pack_id; + compat_uptr_t usr_ptr; + unsigned int duration; + int unused; +}; + +static int put_compat_request_table(struct compat_sg_req_info __user *o, + struct sg_req_info *rinfo) +{ + int i; + for (i = 0; i < SG_MAX_QUEUE; i++) { + if (copy_to_user(o + i, rinfo + i, offsetof(sg_req_info_t, usr_ptr)) || + put_user((uintptr_t)rinfo[i].usr_ptr, &o[i].usr_ptr) || + put_user(rinfo[i].duration, &o[i].duration) || + put_user(rinfo[i].unused, &o[i].unused)) + return -EFAULT; + } + return 0; +} +#endif + static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { @@@ -940,8 -896,6 +942,6 @@@ return -ENODEV; if (!scsi_block_when_processing_errors(sdp->device)) return -ENXIO; - if (!access_ok(p, SZ_SG_IO_HDR)) - return -EFAULT; result = sg_new_write(sfp, filp, p, SZ_SG_IO_HDR, 1, read_only, 1, &srp); if (result < 0) @@@ -986,26 -940,21 +986,21 @@@ case SG_GET_LOW_DMA: return put_user((int) sdp->device->host->unchecked_isa_dma, ip); case SG_GET_SCSI_ID: - if (!access_ok(p, sizeof (sg_scsi_id_t))) - return -EFAULT; - else { - sg_scsi_id_t __user *sg_idp = p; + { + sg_scsi_id_t v;
if (atomic_read(&sdp->detaching)) return -ENODEV; - __put_user((int) sdp->device->host->host_no, - &sg_idp->host_no); - __put_user((int) sdp->device->channel, - &sg_idp->channel); - __put_user((int) sdp->device->id, &sg_idp->scsi_id); - __put_user((int) sdp->device->lun, &sg_idp->lun); - __put_user((int) sdp->device->type, &sg_idp->scsi_type); - __put_user((short) sdp->device->host->cmd_per_lun, - &sg_idp->h_cmd_per_lun); - __put_user((short) sdp->device->queue_depth, - &sg_idp->d_queue_depth); - __put_user(0, &sg_idp->unused[0]); - __put_user(0, &sg_idp->unused[1]); + memset(&v, 0, sizeof(v)); + v.host_no = sdp->device->host->host_no; + v.channel = sdp->device->channel; + v.scsi_id = sdp->device->id; + v.lun = sdp->device->lun; + v.scsi_type = sdp->device->type; + v.h_cmd_per_lun = sdp->device->host->cmd_per_lun; + v.d_queue_depth = sdp->device->queue_depth; + if (copy_to_user(p, &v, sizeof(sg_scsi_id_t))) + return -EFAULT; return 0; } case SG_SET_FORCE_PACK_ID: @@@ -1015,20 -964,16 +1010,16 @@@ sfp->force_packid = val ? 1 : 0; return 0; case SG_GET_PACK_ID: - if (!access_ok(ip, sizeof (int))) - return -EFAULT; read_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) { read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - __put_user(srp->header.pack_id, ip); - return 0; + return put_user(srp->header.pack_id, ip); } } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - __put_user(-1, ip); - return 0; + return put_user(-1, ip); case SG_GET_NUM_WAITING: read_lock_irqsave(&sfp->rq_list_lock, iflags); val = 0; @@@ -1106,13 -1051,8 +1097,13 @@@ read_lock_irqsave(&sfp->rq_list_lock, iflags); sg_fill_request_table(sfp, rinfo); read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - result = copy_to_user(p, rinfo, - SZ_SG_REQ_INFO * SG_MAX_QUEUE); + #ifdef CONFIG_COMPAT + if (in_compat_syscall()) + result = put_compat_request_table(p, rinfo); + else + #endif + result = copy_to_user(p, rinfo, + SZ_SG_REQ_INFO * SG_MAX_QUEUE); result = result ? -EFAULT : 0; kfree(rinfo); return result; @@@ -1823,14 -1763,7 +1814,14 @@@ sg_start_req(Sg_request *srp, unsigned struct iovec *iov = NULL; struct iov_iter i;
- res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + res = compat_import_iovec(rw, hp->dxferp, iov_count, + 0, &iov, &i); + else +#endif + res = import_iovec(rw, hp->dxferp, iov_count, + 0, &iov, &i); if (res < 0) return res;
@@@ -2017,12 -1950,12 +2008,12 @@@ sg_read_oxfer(Sg_request * srp, char __ num = 1 << (PAGE_SHIFT + schp->page_order); for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { - if (__copy_to_user(outp, page_address(schp->pages[k]), + if (copy_to_user(outp, page_address(schp->pages[k]), num_read_xfer)) return -EFAULT; break; } else { - if (__copy_to_user(outp, page_address(schp->pages[k]), + if (copy_to_user(outp, page_address(schp->pages[k]), num)) return -EFAULT; num_read_xfer -= num; diff --combined drivers/scsi/ufs/ufs_bsg.c index dc2f6d2b46ed,3a2e68f1ad42..baeecee35d1e --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@@ -98,8 -98,6 +98,8 @@@ static int ufs_bsg_request(struct bsg_j
bsg_reply->reply_payload_rcv_len = 0;
+ pm_runtime_get_sync(hba->dev); + msgcode = bsg_request->msgcode; switch (msgcode) { case UPIU_TRANSACTION_QUERY_REQ: @@@ -137,8 -135,6 +137,8 @@@ break; }
+ pm_runtime_put_sync(hba->dev); + if (!desc_buff) goto out;
@@@ -162,6 -158,7 +162,7 @@@ out
/** * ufs_bsg_remove - detach and remove the added ufs-bsg node + * @hba: per adapter object * * Should be called when unloading the driver. */ diff --combined drivers/usb/storage/uas.c index 475b9c692827,fd9c0d2c111f..95bba3ba6ac6 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@@ -789,9 -789,29 +789,9 @@@ static int uas_slave_alloc(struct scsi_ { struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; - int maxp;
sdev->hostdata = devinfo;
- /* - * We have two requirements here. We must satisfy the requirements - * of the physical HC and the demands of the protocol, as we - * definitely want no additional memory allocation in this path - * ruling out using bounce buffers. - * - * For a transmission on USB to continue we must never send - * a package that is smaller than maxpacket. Hence the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. - * If the HC does not ensure that through SG, - * the upper layer must do that. We must assume nothing - * about the capabilities off the HC, so we use the most - * pessimistic requirement. - */ - - maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * The protocol has no requirements on alignment in the strict sense. * Controllers may or may not have alignment restrictions. @@@ -825,10 -845,6 +825,10 @@@ static int uas_slave_configure(struct s sdev->wce_default_on = 1; }
+ /* Some disks cannot handle READ_CAPACITY_16 */ + if (devinfo->flags & US_FL_NO_READ_CAPACITY_16) + sdev->no_read_capacity_16 = 1; + /* * Some disks return the total number of blocks in response * to READ CAPACITY rather than the highest block number. @@@ -837,12 -853,6 +837,12 @@@ if (devinfo->flags & US_FL_FIX_CAPACITY) sdev->fix_capacity = 1;
+ /* + * in some cases we have to guess + */ + if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) + sdev->guess_capacity = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device @@@ -869,7 -879,6 +869,6 @@@ static struct scsi_host_template uas_ho .eh_abort_handler = uas_eh_abort_handler, .eh_device_reset_handler = uas_eh_device_reset_handler, .this_id = -1, - .sg_tablesize = SG_NONE, .skip_settle_delay = 1, .dma_boundary = PAGE_SIZE - 1, };