diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index ac17d8669b1adad9291a6a39646b51ff80693176..1292c360390cd79d532ca155158858bd630648d5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -299,6 +299,7 @@ struct qed_hwfn {
 
 	/* Flag indicating whether interrupts are enabled or not*/
 	bool				b_int_enabled;
+	bool				b_int_requested;
 
 	struct qed_mcp_info		*mcp_info;
 
@@ -491,6 +492,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
 
+int qed_slowpath_irq_req(struct qed_hwfn *hwfn);
+
 #define QED_ETH_INTERFACE_VERSION       300
 
 #endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 803b190ccada97b30b28098666146f9d05ad1c54..817bbd5476ffb5956fca105686fc9029d06339ff 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1385,52 +1385,63 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
-static u32 qed_hw_bar_size(struct qed_dev *cdev,
-			   u8 bar_id)
+static u32 qed_hw_bar_size(struct qed_hwfn	*p_hwfn,
+			   u8			bar_id)
 {
-	u32 size = pci_resource_len(cdev->pdev, (bar_id > 0) ? 2 : 0);
+	u32 bar_reg = (bar_id == 0 ? PGLUE_B_REG_PF_BAR0_SIZE
+		       : PGLUE_B_REG_PF_BAR1_SIZE);
+	u32 val = qed_rd(p_hwfn, p_hwfn->p_main_ptt, bar_reg);
 
-	return size / cdev->num_hwfns;
+	/* Get the BAR size(in KB) from hardware given val */
+	return 1 << (val + 15);
 }
 
 int qed_hw_prepare(struct qed_dev *cdev,
 		   int personality)
 {
-	int rc, i;
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	int rc;
 
 	/* Store the precompiled init data ptrs */
 	qed_init_iro_array(cdev);
 
 	/* Initialize the first hwfn - will learn number of hwfns */
-	rc = qed_hw_prepare_single(&cdev->hwfns[0], cdev->regview,
+	rc = qed_hw_prepare_single(p_hwfn,
+				   cdev->regview,
 				   cdev->doorbells, personality);
 	if (rc)
 		return rc;
 
-	personality = cdev->hwfns[0].hw_info.personality;
+	personality = p_hwfn->hw_info.personality;
 
 	/* Initialize the rest of the hwfns */
-	for (i = 1; i < cdev->num_hwfns; i++) {
+	if (cdev->num_hwfns > 1) {
 		void __iomem *p_regview, *p_doorbell;
+		u8 __iomem *addr;
+
+		/* adjust bar offset for second engine */
+		addr = cdev->regview + qed_hw_bar_size(p_hwfn, 0) / 2;
+		p_regview = addr;
 
-		p_regview =  cdev->regview +
-			     i * qed_hw_bar_size(cdev, 0);
-		p_doorbell = cdev->doorbells +
-			     i * qed_hw_bar_size(cdev, 1);
-		rc = qed_hw_prepare_single(&cdev->hwfns[i], p_regview,
+		/* adjust doorbell bar offset for second engine */
+		addr = cdev->doorbells + qed_hw_bar_size(p_hwfn, 1) / 2;
+		p_doorbell = addr;
+
+		/* prepare second hw function */
+		rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview,
 					   p_doorbell, personality);
+
+		/* in case of error, need to free the previously
+		 * initiliazed hwfn 0.
+		 */
 		if (rc) {
-			/* Cleanup previously initialized hwfns */
-			while (--i >= 0) {
-				qed_init_free(&cdev->hwfns[i]);
-				qed_mcp_free(&cdev->hwfns[i]);
-				qed_hw_hwfn_free(&cdev->hwfns[i]);
-			}
-			return rc;
+			qed_init_free(p_hwfn);
+			qed_mcp_free(p_hwfn);
+			qed_hw_hwfn_free(p_hwfn);
 		}
 	}
 
-	return 0;
+	return rc;
 }
 
 void qed_hw_remove(struct qed_dev *cdev)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index de50e84902afe3b6a26c422d34a687ec9bc523ec..9cc9d62c1fec64844eba85ca10213a77d3158227 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -783,22 +783,16 @@ void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
 	qed_wr(p_hwfn, p_ptt, IGU_REG_PF_CONFIGURATION, igu_pf_conf);
 }
 
-void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt,
-			enum qed_int_mode int_mode)
+int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		       enum qed_int_mode int_mode)
 {
-	int i;
-
-	p_hwfn->b_int_enabled = 1;
+	int rc, i;
 
 	/* Mask non-link attentions */
 	for (i = 0; i < 9; i++)
 		qed_wr(p_hwfn, p_ptt,
 		       MISC_REG_AEU_ENABLE1_IGU_OUT_0 + (i << 2), 0);
 
-	/* Enable interrupt Generation */
-	qed_int_igu_enable_int(p_hwfn, p_ptt, int_mode);
-
 	/* Configure AEU signal change to produce attentions for link */
 	qed_wr(p_hwfn, p_ptt, IGU_REG_LEADING_EDGE_LATCH, 0xfff);
 	qed_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0xfff);
@@ -808,6 +802,19 @@ void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
 
 	/* Unmask AEU signals toward IGU */
 	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
+	if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_slowpath_irq_req(p_hwfn);
+		if (rc != 0) {
+			DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n");
+			return -EINVAL;
+		}
+		p_hwfn->b_int_requested = true;
+	}
+	/* Enable interrupt Generation */
+	qed_int_igu_enable_int(p_hwfn, p_ptt, int_mode);
+	p_hwfn->b_int_enabled = 1;
+
+	return rc;
 }
 
 void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
@@ -1127,3 +1134,11 @@ int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
 
 	return info->igu_sb_cnt;
 }
+
+void qed_int_disable_post_isr_release(struct qed_dev *cdev)
+{
+	int i;
+
+	for_each_hwfn(cdev, i)
+		cdev->hwfns[i].b_int_requested = false;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 16b57518e706728dde646688fb059a3f8f88bb97..51e0b09a7f47d3da8ca9c1d2d0982aa2e77d51f4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -169,10 +169,14 @@ int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
 			int *p_iov_blks);
 
 /**
- * @file
+ * @brief qed_int_disable_post_isr_release - performs the cleanup post ISR
+ *        release. The API need to be called after releasing all slowpath IRQs
+ *        of the device.
+ *
+ * @param cdev
  *
- * @brief Interrupt handler
  */
+void qed_int_disable_post_isr_release(struct qed_dev *cdev);
 
 #define QED_CAU_DEF_RX_TIMER_RES 0
 #define QED_CAU_DEF_TX_TIMER_RES 0
@@ -366,10 +370,11 @@ void qed_int_setup(struct qed_hwfn *p_hwfn,
  * @param p_hwfn
  * @param p_ptt
  * @param int_mode
+ *
+ * @return int
  */
-void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt,
-			enum qed_int_mode int_mode);
+int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		       enum qed_int_mode int_mode);
 
 /**
  * @brief - Initialize CAU status block entry
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 947c7af72b25b32db163dc3ea47ca7d63db50490..174f7341c5c32c2bbcf81391b02f6d9a6d9327c2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -476,41 +476,22 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
 	return rc;
 }
 
-static int qed_slowpath_irq_req(struct qed_dev *cdev)
+int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
 {
-	int i = 0, rc = 0;
+	struct qed_dev *cdev = hwfn->cdev;
+	int rc = 0;
+	u8 id;
 
 	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
-		/* Request all the slowpath MSI-X vectors */
-		for (i = 0; i < cdev->num_hwfns; i++) {
-			snprintf(cdev->hwfns[i].name, NAME_SIZE,
-				 "sp-%d-%02x:%02x.%02x",
-				 i, cdev->pdev->bus->number,
-				 PCI_SLOT(cdev->pdev->devfn),
-				 cdev->hwfns[i].abs_pf_id);
-
-			rc = request_irq(cdev->int_params.msix_table[i].vector,
-					 qed_msix_sp_int, 0,
-					 cdev->hwfns[i].name,
-					 cdev->hwfns[i].sp_dpc);
-			if (rc)
-				break;
-
-			DP_VERBOSE(&cdev->hwfns[i],
-				   (NETIF_MSG_INTR | QED_MSG_SP),
+		id = hwfn->my_id;
+		snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x",
+			 id, cdev->pdev->bus->number,
+			 PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
+		rc = request_irq(cdev->int_params.msix_table[id].vector,
+				 qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
+		if (!rc)
+			DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
 				   "Requested slowpath MSI-X\n");
-		}
-
-		if (i != cdev->num_hwfns) {
-			/* Free already request MSI-X vectors */
-			for (i--; i >= 0; i--) {
-				unsigned int vec =
-					cdev->int_params.msix_table[i].vector;
-				synchronize_irq(vec);
-				free_irq(cdev->int_params.msix_table[i].vector,
-					 cdev->hwfns[i].sp_dpc);
-			}
-		}
 	} else {
 		unsigned long flags = 0;
 
@@ -534,13 +515,17 @@ static void qed_slowpath_irq_free(struct qed_dev *cdev)
 
 	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
 		for_each_hwfn(cdev, i) {
+			if (!cdev->hwfns[i].b_int_requested)
+				break;
 			synchronize_irq(cdev->int_params.msix_table[i].vector);
 			free_irq(cdev->int_params.msix_table[i].vector,
 				 cdev->hwfns[i].sp_dpc);
 		}
 	} else {
-		free_irq(cdev->pdev->irq, cdev);
+		if (QED_LEADING_HWFN(cdev)->b_int_requested)
+			free_irq(cdev->pdev->irq, cdev);
 	}
+	qed_int_disable_post_isr_release(cdev);
 }
 
 static int qed_nic_stop(struct qed_dev *cdev)
@@ -765,16 +750,11 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 	if (rc)
 		goto err1;
 
-	/* Request the slowpath IRQ */
-	rc = qed_slowpath_irq_req(cdev);
-	if (rc)
-		goto err2;
-
 	/* Allocate stream for unzipping */
 	rc = qed_alloc_stream_mem(cdev);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to allocate stream memory\n");
-		goto err3;
+		goto err2;
 	}
 
 	/* Start the slowpath */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 7a5ce5914ace2287ec73afa20ed2d9d5eb328108..e8df12335a972cd3092cbefa8b675408446f06c9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -363,4 +363,8 @@
 		0x7 << 0)
 #define  MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT \
 	0
+#define PGLUE_B_REG_PF_BAR0_SIZE \
+	0x2aae60UL
+#define PGLUE_B_REG_PF_BAR1_SIZE \
+	0x2aae64UL
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 31a1f1eb4f56a5d00f0ce3ae75a29af56a96ef12..287fadfab52d754e0a913d4ed044ca7e50cf1a09 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -124,8 +124,12 @@ struct qed_spq {
 	dma_addr_t		p_phys;
 	struct qed_spq_entry	*p_virt;
 
-	/* Used as index for completions (returns on EQ by FW) */
-	u16			echo_idx;
+#define SPQ_RING_SIZE \
+	(CORE_SPQE_PAGE_SIZE_BYTES / sizeof(struct slow_path_element))
+
+	/* Bitmap for handling out-of-order completions */
+	DECLARE_BITMAP(p_comp_bitmap, SPQ_RING_SIZE);
+	u8			comp_bitmap_idx;
 
 	/* Statistics */
 	u32			unlimited_pending_count;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 7c0b8459666ed270ec754a87c9fa1c072d16dd49..3dd548ab8df14aabf9fa6a551cf54aa97f704d12 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -112,8 +112,6 @@ static int
 qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
 		   struct qed_spq_entry *p_ent)
 {
-	p_ent->elem.hdr.echo = 0;
-	p_hwfn->p_spq->echo_idx++;
 	p_ent->flags = 0;
 
 	switch (p_ent->comp_mode) {
@@ -195,10 +193,12 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
 			   struct qed_spq *p_spq,
 			   struct qed_spq_entry *p_ent)
 {
-	struct qed_chain		*p_chain = &p_hwfn->p_spq->chain;
+	struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
+	u16 echo = qed_chain_get_prod_idx(p_chain);
 	struct slow_path_element	*elem;
 	struct core_db_data		db;
 
+	p_ent->elem.hdr.echo	= cpu_to_le16(echo);
 	elem = qed_chain_produce(p_chain);
 	if (!elem) {
 		DP_NOTICE(p_hwfn, "Failed to produce from SPQ chain\n");
@@ -437,7 +437,9 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn)
 	p_spq->comp_count		= 0;
 	p_spq->comp_sent_count		= 0;
 	p_spq->unlimited_pending_count	= 0;
-	p_spq->echo_idx			= 0;
+
+	bitmap_zero(p_spq->p_comp_bitmap, SPQ_RING_SIZE);
+	p_spq->comp_bitmap_idx = 0;
 
 	/* SPQ cid, cannot fail */
 	qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_spq->cid);
@@ -582,26 +584,32 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 
 	if (p_ent->queue == &p_spq->unlimited_pending) {
-		struct qed_spq_entry *p_en2;
 
 		if (list_empty(&p_spq->free_pool)) {
 			list_add_tail(&p_ent->list, &p_spq->unlimited_pending);
 			p_spq->unlimited_pending_count++;
 
 			return 0;
-		}
+		} else {
+			struct qed_spq_entry *p_en2;
 
-		p_en2 = list_first_entry(&p_spq->free_pool,
-					 struct qed_spq_entry,
-					 list);
-		list_del(&p_en2->list);
+			p_en2 = list_first_entry(&p_spq->free_pool,
+						 struct qed_spq_entry,
+						 list);
+			list_del(&p_en2->list);
+
+			/* Copy the ring element physical pointer to the new
+			 * entry, since we are about to override the entire ring
+			 * entry and don't want to lose the pointer.
+			 */
+			p_ent->elem.data_ptr = p_en2->elem.data_ptr;
 
-		/* Strcut assignment */
-		*p_en2 = *p_ent;
+			*p_en2 = *p_ent;
 
-		kfree(p_ent);
+			kfree(p_ent);
 
-		p_ent = p_en2;
+			p_ent = p_en2;
+		}
 	}
 
 	/* entry is to be placed in 'pending' queue */
@@ -777,13 +785,38 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending,
 				 list) {
 		if (p_ent->elem.hdr.echo == echo) {
+			u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
+
 			list_del(&p_ent->list);
 
-			qed_chain_return_produced(&p_spq->chain);
+			/* Avoid overriding of SPQ entries when getting
+			 * out-of-order completions, by marking the completions
+			 * in a bitmap and increasing the chain consumer only
+			 * for the first successive completed entries.
+			 */
+			bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
+
+			while (test_bit(p_spq->comp_bitmap_idx,
+					p_spq->p_comp_bitmap)) {
+				bitmap_clear(p_spq->p_comp_bitmap,
+					     p_spq->comp_bitmap_idx,
+					     SPQ_RING_SIZE);
+				p_spq->comp_bitmap_idx++;
+				qed_chain_return_produced(&p_spq->chain);
+			}
+
 			p_spq->comp_count++;
 			found = p_ent;
 			break;
 		}
+
+		/* This is relatively uncommon - depends on scenarios
+		 * which have mutliple per-PF sent ramrods.
+		 */
+		DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+			   "Got completion for echo %04x - doesn't match echo %04x in completion pending list\n",
+			   le16_to_cpu(echo),
+			   le16_to_cpu(p_ent->elem.hdr.echo));
 	}
 
 	/* Release lock before callback, as callback may post
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 6a4347639c0329235a735d723a564acecf59f022..1d1ba2c5ee7a26f6a35b1835c016a6bd7755fcd8 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -9,6 +9,8 @@
 #ifndef __COMMON_HSI__
 #define __COMMON_HSI__
 
+#define CORE_SPQE_PAGE_SIZE_BYTES                       4096
+
 #define FW_MAJOR_VERSION	8
 #define FW_MINOR_VERSION	4
 #define FW_REVISION_VERSION	2
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index b920c3605c462ef0bdb9c8ba0c8cd9d8957addb7..41b9049b57e2475575d076b4f0728234618ce07e 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -111,7 +111,8 @@ static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
 	used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) -
 		(u32)p_chain->cons_idx;
 	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
-		used -= (used / p_chain->elem_per_page);
+		used -= p_chain->prod_idx / p_chain->elem_per_page -
+			p_chain->cons_idx / p_chain->elem_per_page;
 
 	return p_chain->capacity - used;
 }