diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6d8352c0f3547a9f63fa7db614e1466f2b822616..f7aca562f267859da09aa075dfaf53d072741684 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -34,6 +34,7 @@ available subsections can be seen below.
    edac
    scsi
    libata
+   target
    mtdnand
    miscellaneous
    w1
diff --git a/Documentation/driver-api/scsi.rst b/Documentation/driver-api/scsi.rst
index 31ad0fed6763bcdac1df622ae8b66433c6e2a5a5..64b231d125e0fc0f5396e18022898086b2d5be40 100644
--- a/Documentation/driver-api/scsi.rst
+++ b/Documentation/driver-api/scsi.rst
@@ -334,5 +334,5 @@ todo
 ~~~~
 
 Parallel (fast/wide/ultra) SCSI, USB, SATA, SAS, Fibre Channel,
-FireWire, ATAPI devices, Infiniband, I2O, iSCSI, Parallel ports,
+FireWire, ATAPI devices, Infiniband, I2O, Parallel ports,
 netlink...
diff --git a/Documentation/driver-api/target.rst b/Documentation/driver-api/target.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4363611dd86d1dba4696c2574d2b83cdf8419321
--- /dev/null
+++ b/Documentation/driver-api/target.rst
@@ -0,0 +1,64 @@
+=================================
+target and iSCSI Interfaces Guide
+=================================
+
+Introduction and Overview
+=========================
+
+TBD
+
+Target core device interfaces
+=============================
+
+.. kernel-doc:: drivers/target/target_core_device.c
+    :export:
+
+Target core transport interfaces
+================================
+
+.. kernel-doc:: drivers/target/target_core_transport.c
+    :export:
+
+Target-supported userspace I/O
+==============================
+
+.. kernel-doc:: drivers/target/target_core_user.c
+    :doc: Userspace I/O
+
+.. kernel-doc:: include/uapi/linux/target_core_user.h
+    :doc: Ring Design
+
+iSCSI helper functions
+======================
+
+.. kernel-doc:: drivers/scsi/libiscsi.c
+   :export:
+
+
+iSCSI boot information
+======================
+
+.. kernel-doc:: drivers/scsi/iscsi_boot_sysfs.c
+   :export:
+
+
+iSCSI transport class
+=====================
+
+The file drivers/scsi/scsi_transport_iscsi.c defines transport
+attributes for the iSCSI class, which sends SCSI packets over TCP/IP
+connections.
+
+.. kernel-doc:: drivers/scsi/scsi_transport_iscsi.c
+   :export:
+
+
+iSCSI TCP interfaces
+====================
+
+.. kernel-doc:: drivers/scsi/iscsi_tcp.c
+   :internal:
+
+.. kernel-doc:: drivers/scsi/libiscsi_tcp.c
+   :export:
+
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index f31145d6d4722df71a77f9b807e3e13de8de6884..eb30e558fc360dd2fbe257589b4641828313b0a5 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -190,7 +190,7 @@ $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
 $(obj)/scsi_sysfs.o: $(obj)/scsi_devinfo_tbl.c
 
 quiet_cmd_bflags = GEN     $@
-	cmd_bflags = sed -n 's/.*BLIST_\([A-Z0-9_]*\) *.*/BLIST_FLAG_NAME(\1),/p' $< > $@
+	cmd_bflags = sed -n 's/.*define *BLIST_\([A-Z0-9_]*\) *.*/BLIST_FLAG_NAME(\1),/p' $< > $@
 
 $(obj)/scsi_devinfo_tbl.c: include/scsi/scsi_devinfo.h
 	$(call if_changed,bflags)
diff --git a/drivers/scsi/esas2r/esas2r_init.c b/drivers/scsi/esas2r/esas2r_init.c
index 9dffcb28c9b74d0cf0ee89abce9cf3a39f3eefd6..9db645dde35ec355071219362d301f05ef322543 100644
--- a/drivers/scsi/esas2r/esas2r_init.c
+++ b/drivers/scsi/esas2r/esas2r_init.c
@@ -1202,8 +1202,6 @@ static bool esas2r_format_init_msg(struct esas2r_adapter *a,
 	case ESAS2R_INIT_MSG_START:
 	case ESAS2R_INIT_MSG_REINIT:
 	{
-		struct timeval now;
-		do_gettimeofday(&now);
 		esas2r_hdebug("CFG init");
 		esas2r_build_cfg_req(a,
 				     rq,
@@ -1212,7 +1210,8 @@ static bool esas2r_format_init_msg(struct esas2r_adapter *a,
 				     NULL);
 		ci = (struct atto_vda_cfg_init *)&rq->vrq->cfg.data.init;
 		ci->sgl_page_size = cpu_to_le32(sgl_page_size);
-		ci->epoch_time = cpu_to_le32(now.tv_sec);
+		/* firmware interface overflows in y2106 */
+		ci->epoch_time = cpu_to_le32(ktime_get_real_seconds());
 		rq->flags |= RF_FAILURE_OK;
 		a->init_msg = ESAS2R_INIT_MSG_INIT;
 		break;
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index e3c8857741a13aa2ff3d84d1f5812b28ca46c85a..bd6ac6b5980a1128af2b0d4234e51f9a97c52a5b 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -291,7 +291,7 @@ static void ips_freescb(ips_ha_t *, ips_scb_t *);
 static void ips_setup_funclist(ips_ha_t *);
 static void ips_statinit(ips_ha_t *);
 static void ips_statinit_memio(ips_ha_t *);
-static void ips_fix_ffdc_time(ips_ha_t *, ips_scb_t *, time_t);
+static void ips_fix_ffdc_time(ips_ha_t *, ips_scb_t *, time64_t);
 static void ips_ffdc_reset(ips_ha_t *, int);
 static void ips_ffdc_time(ips_ha_t *);
 static uint32_t ips_statupd_copperhead(ips_ha_t *);
@@ -985,10 +985,7 @@ static int __ips_eh_reset(struct scsi_cmnd *SC)
 
 	/* FFDC */
 	if (le32_to_cpu(ha->subsys->param[3]) & 0x300000) {
-		struct timeval tv;
-
-		do_gettimeofday(&tv);
-		ha->last_ffdc = tv.tv_sec;
+		ha->last_ffdc = ktime_get_real_seconds();
 		ha->reset_count++;
 		ips_ffdc_reset(ha, IPS_INTR_IORL);
 	}
@@ -2392,7 +2389,6 @@ static int
 ips_hainit(ips_ha_t * ha)
 {
 	int i;
-	struct timeval tv;
 
 	METHOD_TRACE("ips_hainit", 1);
 
@@ -2407,8 +2403,7 @@ ips_hainit(ips_ha_t * ha)
 
 	/* Send FFDC */
 	ha->reset_count = 1;
-	do_gettimeofday(&tv);
-	ha->last_ffdc = tv.tv_sec;
+	ha->last_ffdc = ktime_get_real_seconds();
 	ips_ffdc_reset(ha, IPS_INTR_IORL);
 
 	if (!ips_read_config(ha, IPS_INTR_IORL)) {
@@ -2548,12 +2543,9 @@ ips_next(ips_ha_t * ha, int intr)
 
 	if ((ha->subsys->param[3] & 0x300000)
 	    && (ha->scb_activelist.count == 0)) {
-		struct timeval tv;
-
-		do_gettimeofday(&tv);
-
-		if (tv.tv_sec - ha->last_ffdc > IPS_SECS_8HOURS) {
-			ha->last_ffdc = tv.tv_sec;
+		time64_t now = ktime_get_real_seconds();
+		if (now - ha->last_ffdc > IPS_SECS_8HOURS) {
+			ha->last_ffdc = now;
 			ips_ffdc_time(ha);
 		}
 	}
@@ -5988,59 +5980,21 @@ ips_ffdc_time(ips_ha_t * ha)
 /*                                                                          */
 /****************************************************************************/
 static void
-ips_fix_ffdc_time(ips_ha_t * ha, ips_scb_t * scb, time_t current_time)
+ips_fix_ffdc_time(ips_ha_t * ha, ips_scb_t * scb, time64_t current_time)
 {
-	long days;
-	long rem;
-	int i;
-	int year;
-	int yleap;
-	int year_lengths[2] = { IPS_DAYS_NORMAL_YEAR, IPS_DAYS_LEAP_YEAR };
-	int month_lengths[12][2] = { {31, 31},
-	{28, 29},
-	{31, 31},
-	{30, 30},
-	{31, 31},
-	{30, 30},
-	{31, 31},
-	{31, 31},
-	{30, 30},
-	{31, 31},
-	{30, 30},
-	{31, 31}
-	};
+	struct tm tm;
 
 	METHOD_TRACE("ips_fix_ffdc_time", 1);
 
-	days = current_time / IPS_SECS_DAY;
-	rem = current_time % IPS_SECS_DAY;
-
-	scb->cmd.ffdc.hour = (rem / IPS_SECS_HOUR);
-	rem = rem % IPS_SECS_HOUR;
-	scb->cmd.ffdc.minute = (rem / IPS_SECS_MIN);
-	scb->cmd.ffdc.second = (rem % IPS_SECS_MIN);
-
-	year = IPS_EPOCH_YEAR;
-	while (days < 0 || days >= year_lengths[yleap = IPS_IS_LEAP_YEAR(year)]) {
-		int newy;
-
-		newy = year + (days / IPS_DAYS_NORMAL_YEAR);
-		if (days < 0)
-			--newy;
-		days -= (newy - year) * IPS_DAYS_NORMAL_YEAR +
-		    IPS_NUM_LEAP_YEARS_THROUGH(newy - 1) -
-		    IPS_NUM_LEAP_YEARS_THROUGH(year - 1);
-		year = newy;
-	}
-
-	scb->cmd.ffdc.yearH = year / 100;
-	scb->cmd.ffdc.yearL = year % 100;
-
-	for (i = 0; days >= month_lengths[i][yleap]; ++i)
-		days -= month_lengths[i][yleap];
+	time64_to_tm(current_time, 0, &tm);
 
-	scb->cmd.ffdc.month = i + 1;
-	scb->cmd.ffdc.day = days + 1;
+	scb->cmd.ffdc.hour   = tm.tm_hour;
+	scb->cmd.ffdc.minute = tm.tm_min;
+	scb->cmd.ffdc.second = tm.tm_sec;
+	scb->cmd.ffdc.yearH  = (tm.tm_year + 1900) / 100;
+	scb->cmd.ffdc.yearL  = tm.tm_year % 100;
+	scb->cmd.ffdc.month  = tm.tm_mon + 1;
+	scb->cmd.ffdc.day    = tm.tm_mday;
 }
 
 /****************************************************************************
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index 366be3b2f9b49a8d42b95eb5b612f912e329dba4..db546171e97fbdfc37f65d9f530a51ca0b18dab6 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -402,16 +402,7 @@
    #define IPS_BIOS_HEADER             0xC0
 
    /* time oriented stuff */
-   #define IPS_IS_LEAP_YEAR(y)           (((y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))) ? 1 : 0)
-   #define IPS_NUM_LEAP_YEARS_THROUGH(y) ((y) / 4 - (y) / 100 + (y) / 400)
-
-   #define IPS_SECS_MIN                 60
-   #define IPS_SECS_HOUR                3600
    #define IPS_SECS_8HOURS              28800
-   #define IPS_SECS_DAY                 86400
-   #define IPS_DAYS_NORMAL_YEAR         365
-   #define IPS_DAYS_LEAP_YEAR           366
-   #define IPS_EPOCH_YEAR               1970
 
    /*
     * Scsi_Host Template
@@ -1054,7 +1045,7 @@ typedef struct ips_ha {
    uint8_t            active;
    int                ioctl_reset;        /* IOCTL Requested Reset Flag */
    uint16_t           reset_count;        /* number of resets           */
-   time_t             last_ffdc;          /* last time we sent ffdc info*/
+   time64_t           last_ffdc;          /* last time we sent ffdc info*/
    uint8_t            slot_num;           /* PCI Slot Number            */
    int                ioctl_len;          /* size of ioctl buffer       */
    dma_addr_t         ioctl_busaddr;      /* dma address of ioctl buffer*/
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index fe97401ad1927b9a0edc7de7afbdbe2007d80a4f..2e6fd864723b185cd0f540512c33d0f34af19013 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -2693,22 +2693,4 @@ static struct pci_driver mvumi_pci_driver = {
 #endif
 };
 
-/**
- * mvumi_init - Driver load entry point
- */
-static int __init mvumi_init(void)
-{
-	return pci_register_driver(&mvumi_pci_driver);
-}
-
-/**
- * mvumi_exit - Driver unload entry point
- */
-static void __exit mvumi_exit(void)
-{
-
-	pci_unregister_driver(&mvumi_pci_driver);
-}
-
-module_init(mvumi_init);
-module_exit(mvumi_exit);
+module_pci_driver(mvumi_pci_driver);
diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c
index b784002ef0bd00369c10f79aced81290a2366a3c..c5a8756384bcf1b120a54b0d1a1bc391137bc964 100644
--- a/drivers/scsi/scsi_debugfs.c
+++ b/drivers/scsi/scsi_debugfs.c
@@ -4,7 +4,7 @@
 #include <scsi/scsi_dbg.h>
 #include "scsi_debugfs.h"
 
-#define SCSI_CMD_FLAG_NAME(name) [ilog2(SCMD_##name)] = #name
+#define SCSI_CMD_FLAG_NAME(name)[const_ilog2(SCMD_##name)] = #name
 static const char *const scsi_cmd_flags[] = {
 	SCSI_CMD_FLAG_NAME(TAGGED),
 	SCSI_CMD_FLAG_NAME(UNCHECKED_ISA_DMA),
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index e5c82f13b7d9d8e03228571752d8c5c04509a447..c4cbfd07b9167f0e29b635b9b24e65a6df3826d9 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -161,12 +161,14 @@ static struct {
 	{"DGC", "RAID", NULL, BLIST_SPARSELUN},	/* EMC CLARiiON, storage on LUN 0 */
 	{"DGC", "DISK", NULL, BLIST_SPARSELUN},	/* EMC CLARiiON, no storage on LUN 0 */
 	{"EMC",  "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
-	{"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2},
+	{"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN |
+	 BLIST_REPORTLUN2 | BLIST_RETRY_ITF},
 	{"EMULEX", "MD21/S2     ESDI", NULL, BLIST_SINGLELUN},
 	{"easyRAID", "16P", NULL, BLIST_NOREPORTLUN},
 	{"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN},
 	{"easyRAID", "F8", NULL, BLIST_NOREPORTLUN},
 	{"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+	{"FUJITSU", "ETERNUS_DXM", "*", BLIST_RETRY_ASC_C1},
 	{"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
 	{"Generic", "USB Storage-SMC", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36}, /* FW: 0180 and 0207 */
 	{"HITACHI", "DF400", "*", BLIST_REPORTLUN2},
@@ -360,8 +362,22 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
 	scsi_strcpy_devinfo("model", devinfo->model, sizeof(devinfo->model),
 			    model, compatible);
 
-	if (strflags)
-		flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
+	if (strflags) {
+		unsigned long long val;
+		int ret = kstrtoull(strflags, 0, &val);
+
+		if (ret != 0) {
+			kfree(devinfo);
+			return ret;
+		}
+		flags = (__force blist_flags_t)val;
+	}
+	if (flags & __BLIST_UNUSED_MASK) {
+		pr_err("scsi_devinfo (%s:%s): unsupported flags 0x%llx",
+		       vendor, model, flags & __BLIST_UNUSED_MASK);
+		kfree(devinfo);
+		return -EINVAL;
+	}
 	devinfo->flags = flags;
 	devinfo->compatible = compatible;
 
@@ -614,7 +630,7 @@ static int devinfo_seq_show(struct seq_file *m, void *v)
 	    devinfo_table->name)
 		seq_printf(m, "[%s]:\n", devinfo_table->name);
 
-	seq_printf(m, "'%.8s' '%.16s' 0x%x\n",
+	seq_printf(m, "'%.8s' '%.16s' 0x%llx\n",
 		   devinfo->vendor, devinfo->model, devinfo->flags);
 	return 0;
 }
@@ -733,9 +749,9 @@ MODULE_PARM_DESC(dev_flags,
 	 " list entries for vendor and model with an integer value of flags"
 	 " to the scsi device info list");
 
-module_param_named(default_dev_flags, scsi_default_dev_flags, int, S_IRUGO|S_IWUSR);
+module_param_named(default_dev_flags, scsi_default_dev_flags, ullong, 0644);
 MODULE_PARM_DESC(default_dev_flags,
-		 "scsi default device flag integer value");
+		 "scsi default device flag uint64_t value");
 
 /**
  * scsi_exit_devinfo - remove /proc/scsi/device_info & the scsi_dev_info_list
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 946039117bf4f2c655db2bb1129ee75df1f1df21..94d2047e0096aee29eb3238aa69982a1306ef2dc 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -38,6 +38,7 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsi_dh.h>
+#include <scsi/scsi_devinfo.h>
 #include <scsi/sg.h>
 
 #include "scsi_priv.h"
@@ -525,6 +526,12 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
 		if (sshdr.asc == 0x10) /* DIF */
 			return SUCCESS;
 
+		if (sshdr.asc == 0x44 && sdev->sdev_bflags & BLIST_RETRY_ITF)
+			return ADD_TO_MLQUEUE;
+		if (sshdr.asc == 0xc1 && sshdr.ascq == 0x01 &&
+		    sdev->sdev_bflags & BLIST_RETRY_ASC_C1)
+			return ADD_TO_MLQUEUE;
+
 		return NEEDS_RETRY;
 	case NOT_READY:
 	case UNIT_ATTENTION:
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 1e36c9a9ad17df678529e13bf0dfc0f888a3c35e..7943b762c12de7609d28168316e57b3ac15c91be 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -968,7 +968,7 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
 
 #define BLIST_FLAG_NAME(name)					\
-	[ilog2((__force unsigned int)BLIST_##name)] = #name
+	[const_ilog2((__force __u64)BLIST_##name)] = #name
 static const char *const sdev_bflags_name[] = {
 #include "scsi_devinfo_tbl.c"
 };
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 6c399480783d3051aac5ad65e9b4865ed99eca41..e64489a4a9a610e76109e4b4fa8fa6a3f6c86a51 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3878,7 +3878,7 @@ static struct st_buffer *new_tape_buffer(int need_dma, int max_sg)
 {
 	struct st_buffer *tb;
 
-	tb = kzalloc(sizeof(struct st_buffer), GFP_ATOMIC);
+	tb = kzalloc(sizeof(struct st_buffer), GFP_KERNEL);
 	if (!tb) {
 		printk(KERN_NOTICE "st: Can't allocate new tape buffer.\n");
 		return NULL;
@@ -3889,7 +3889,7 @@ static struct st_buffer *new_tape_buffer(int need_dma, int max_sg)
 	tb->buffer_size = 0;
 
 	tb->reserved_pages = kzalloc(max_sg * sizeof(struct page *),
-				     GFP_ATOMIC);
+				     GFP_KERNEL);
 	if (!tb->reserved_pages) {
 		kfree(tb);
 		return NULL;
@@ -4290,7 +4290,7 @@ static int st_probe(struct device *dev)
 		goto out_buffer_free;
 	}
 
-	tpnt = kzalloc(sizeof(struct scsi_tape), GFP_ATOMIC);
+	tpnt = kzalloc(sizeof(struct scsi_tape), GFP_KERNEL);
 	if (tpnt == NULL) {
 		sdev_printk(KERN_ERR, SDp,
 			    "st: Can't allocate device descriptor.\n");
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 882e4b8c1ff9197e4ab47d7cdd3b1d4f93e05733..7ba99210928138a06e11b49ed274a77b6a1de264 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+
+static int ring_avail_percent_lowater = 10;
+module_param(ring_avail_percent_lowater, int, S_IRUGO);
+MODULE_PARM_DESC(ring_avail_percent_lowater,
+		"Select a channel if available ring size > this in percent");
+
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
@@ -468,6 +474,13 @@ struct storvsc_device {
 	 * Mask of CPUs bound to subchannels.
 	 */
 	struct cpumask alloced_cpus;
+	/*
+	 * Pre-allocated struct cpumask for each hardware queue.
+	 * struct cpumask is used by selecting out-going channels. It is a
+	 * big structure, default to 1024k bytes when CONFIG_MAXSMP=y.
+	 * Pre-allocate it to avoid allocation on the kernel stack.
+	 */
+	struct cpumask *cpumask_chns;
 	/* Used for vsc/vsp channel reset process */
 	struct storvsc_cmd_request init_request;
 	struct storvsc_cmd_request reset_request;
@@ -872,6 +885,13 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
 	if (stor_device->stor_chns == NULL)
 		return -ENOMEM;
 
+	stor_device->cpumask_chns = kcalloc(num_possible_cpus(),
+			sizeof(struct cpumask), GFP_KERNEL);
+	if (stor_device->cpumask_chns == NULL) {
+		kfree(stor_device->stor_chns);
+		return -ENOMEM;
+	}
+
 	stor_device->stor_chns[device->channel->target_cpu] = device->channel;
 	cpumask_set_cpu(device->channel->target_cpu,
 			&stor_device->alloced_cpus);
@@ -1232,6 +1252,7 @@ static int storvsc_dev_remove(struct hv_device *device)
 	vmbus_close(device->channel);
 
 	kfree(stor_device->stor_chns);
+	kfree(stor_device->cpumask_chns);
 	kfree(stor_device);
 	return 0;
 }
@@ -1241,7 +1262,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 {
 	u16 slot = 0;
 	u16 hash_qnum;
-	struct cpumask alloced_mask;
+	struct cpumask *alloced_mask = &stor_device->cpumask_chns[q_num];
 	int num_channels, tgt_cpu;
 
 	if (stor_device->num_sc == 0)
@@ -1257,10 +1278,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 	 * III. Mapping is persistent.
 	 */
 
-	cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+	cpumask_and(alloced_mask, &stor_device->alloced_cpus,
 		    cpumask_of_node(cpu_to_node(q_num)));
 
-	num_channels = cpumask_weight(&alloced_mask);
+	num_channels = cpumask_weight(alloced_mask);
 	if (num_channels == 0)
 		return stor_device->device->channel;
 
@@ -1268,7 +1289,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 	while (hash_qnum >= num_channels)
 		hash_qnum -= num_channels;
 
-	for_each_cpu(tgt_cpu, &alloced_mask) {
+	for_each_cpu(tgt_cpu, alloced_mask) {
 		if (slot == hash_qnum)
 			break;
 		slot++;
@@ -1285,9 +1306,9 @@ static int storvsc_do_io(struct hv_device *device,
 {
 	struct storvsc_device *stor_device;
 	struct vstor_packet *vstor_packet;
-	struct vmbus_channel *outgoing_channel;
+	struct vmbus_channel *outgoing_channel, *channel;
 	int ret = 0;
-	struct cpumask alloced_mask;
+	struct cpumask *alloced_mask;
 	int tgt_cpu;
 
 	vstor_packet = &request->vstor_packet;
@@ -1301,22 +1322,53 @@ static int storvsc_do_io(struct hv_device *device,
 	/*
 	 * Select an an appropriate channel to send the request out.
 	 */
-
 	if (stor_device->stor_chns[q_num] != NULL) {
 		outgoing_channel = stor_device->stor_chns[q_num];
-		if (outgoing_channel->target_cpu == smp_processor_id()) {
+		if (outgoing_channel->target_cpu == q_num) {
 			/*
 			 * Ideally, we want to pick a different channel if
 			 * available on the same NUMA node.
 			 */
-			cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+			alloced_mask = &stor_device->cpumask_chns[q_num];
+			cpumask_and(alloced_mask, &stor_device->alloced_cpus,
 				    cpumask_of_node(cpu_to_node(q_num)));
-			for_each_cpu_wrap(tgt_cpu, &alloced_mask,
-					outgoing_channel->target_cpu + 1) {
-				if (tgt_cpu != outgoing_channel->target_cpu) {
-					outgoing_channel =
-					stor_device->stor_chns[tgt_cpu];
-					break;
+
+			for_each_cpu_wrap(tgt_cpu, alloced_mask, q_num + 1) {
+				if (tgt_cpu == q_num)
+					continue;
+				channel = stor_device->stor_chns[tgt_cpu];
+				if (hv_get_avail_to_write_percent(
+							&channel->outbound)
+						> ring_avail_percent_lowater) {
+					outgoing_channel = channel;
+					goto found_channel;
+				}
+			}
+
+			/*
+			 * All the other channels on the same NUMA node are
+			 * busy. Try to use the channel on the current CPU
+			 */
+			if (hv_get_avail_to_write_percent(
+						&outgoing_channel->outbound)
+					> ring_avail_percent_lowater)
+				goto found_channel;
+
+			/*
+			 * If we reach here, all the channels on the current
+			 * NUMA node are busy. Try to find a channel in
+			 * other NUMA nodes
+			 */
+			cpumask_andnot(alloced_mask, &stor_device->alloced_cpus,
+					cpumask_of_node(cpu_to_node(q_num)));
+
+			for_each_cpu(tgt_cpu, alloced_mask) {
+				channel = stor_device->stor_chns[tgt_cpu];
+				if (hv_get_avail_to_write_percent(
+							&channel->outbound)
+						> ring_avail_percent_lowater) {
+					outgoing_channel = channel;
+					goto found_channel;
 				}
 			}
 		}
@@ -1324,7 +1376,7 @@ static int storvsc_do_io(struct hv_device *device,
 		outgoing_channel = get_og_chn(stor_device, q_num);
 	}
 
-
+found_channel:
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) -
@@ -1729,8 +1781,9 @@ static int storvsc_probe(struct hv_device *device,
 				(num_cpus - 1) / storvsc_vcpus_per_sub_channel;
 	}
 
-	scsi_driver.can_queue = (max_outstanding_req_per_channel *
-				 (max_sub_channels + 1));
+	scsi_driver.can_queue = max_outstanding_req_per_channel *
+				(max_sub_channels + 1) *
+				(100 - ring_avail_percent_lowater) / 100;
 
 	host = scsi_host_alloc(&scsi_driver,
 			       sizeof(struct hv_host_device));
@@ -1861,6 +1914,7 @@ static int storvsc_probe(struct hv_device *device,
 
 err_out1:
 	kfree(stor_device->stor_chns);
+	kfree(stor_device->cpumask_chns);
 	kfree(stor_device);
 
 err_out0:
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 4558f2e1fe1bb5ae3b4907f6ba4b86cb7f1d2e3a..3500aa5927f23a2bdcdf798a3524682cd9e0cab3 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1431,7 +1431,7 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
 	return 0;
 }
 
-/*
+/**
  * target_submit_cmd_map_sgls - lookup unpacked lun and submit uninitialized
  * 			 se_cmd + use pre-allocated SGL memory.
  *
@@ -1441,7 +1441,7 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
  * @sense: pointer to SCSI sense buffer
  * @unpacked_lun: unpacked LUN to reference for struct se_lun
  * @data_length: fabric expected data transfer length
- * @task_addr: SAM task attribute
+ * @task_attr: SAM task attribute
  * @data_dir: DMA data direction
  * @flags: flags for command submission from target_sc_flags_tables
  * @sgl: struct scatterlist memory for unidirectional mapping
@@ -1578,7 +1578,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess
 }
 EXPORT_SYMBOL(target_submit_cmd_map_sgls);
 
-/*
+/**
  * target_submit_cmd - lookup unpacked lun and submit uninitialized se_cmd
  *
  * @se_cmd: command descriptor to submit
@@ -1587,7 +1587,7 @@ EXPORT_SYMBOL(target_submit_cmd_map_sgls);
  * @sense: pointer to SCSI sense buffer
  * @unpacked_lun: unpacked LUN to reference for struct se_lun
  * @data_length: fabric expected data transfer length
- * @task_addr: SAM task attribute
+ * @task_attr: SAM task attribute
  * @data_dir: DMA data direction
  * @flags: flags for command submission from target_sc_flags_tables
  *
@@ -1654,7 +1654,7 @@ static bool target_lookup_lun_from_tag(struct se_session *se_sess, u64 tag,
  * @se_sess: associated se_sess for endpoint
  * @sense: pointer to SCSI sense buffer
  * @unpacked_lun: unpacked LUN to reference for struct se_lun
- * @fabric_context: fabric context for TMR req
+ * @fabric_tmr_ptr: fabric context for TMR req
  * @tm_type: Type of TM request
  * @gfp: gfp type for caller
  * @tag: referenced task tag for TMR_ABORT_TASK
@@ -2606,7 +2606,8 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
 }
 EXPORT_SYMBOL(transport_generic_free_cmd);
 
-/* target_get_sess_cmd - Add command to active ->sess_cmd_list
+/**
+ * target_get_sess_cmd - Add command to active ->sess_cmd_list
  * @se_cmd:	command descriptor to add
  * @ack_kref:	Signal that fabric will perform an ack target_put_sess_cmd()
  */
@@ -2800,7 +2801,8 @@ void target_show_cmd(const char *pfx, struct se_cmd *cmd)
 }
 EXPORT_SYMBOL(target_show_cmd);
 
-/* target_sess_cmd_list_set_waiting - Flag all commands in
+/**
+ * target_sess_cmd_list_set_waiting - Flag all commands in
  *         sess_cmd_list to complete cmd_wait_comp.  Set
  *         sess_tearing_down so no more commands are queued.
  * @se_sess:	session to flag
@@ -2835,7 +2837,8 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(target_sess_cmd_list_set_waiting);
 
-/* target_wait_for_sess_cmds - Wait for outstanding descriptors
+/**
+ * target_wait_for_sess_cmds - Wait for outstanding descriptors
  * @se_sess:    session to wait for active I/O
  */
 void target_wait_for_sess_cmds(struct se_session *se_sess)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index d6af29250d94994e95bdfdb35d54e53a575d58c8..ae0aea9a3aad1b11e616f6a8ddb57551e7c6acb0 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -42,7 +42,11 @@
 
 #include <linux/target_core_user.h>
 
-/*
+/**
+ * DOC: Userspace I/O
+ * Userspace I/O
+ * -------------
+ *
  * Define a shared-memory interface for LIO to pass SCSI commands and
  * data to userspace for processing. This is to allow backends that
  * are too complex for in-kernel support to be possible.
@@ -53,7 +57,7 @@
  * See the .h file for how the ring is laid out. Note that while the
  * command ring is defined, the particulars of the data area are
  * not. Offset values in the command entry point to other locations
- * internal to the mmap()ed area. There is separate space outside the
+ * internal to the mmap-ed area. There is separate space outside the
  * command ring for data buffers. This leaves maximum flexibility for
  * moving buffer allocations, or even page flipping or other
  * allocation techniques, without altering the command ring layout.
diff --git a/include/linux/log2.h b/include/linux/log2.h
index 41a1ae01099397dc0e6553bbb60155df33448ef4..2af7f77866d035fd125f363eb432cddfdacf83bf 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -72,16 +72,13 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 }
 
 /**
- * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
+ * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
  * @n: parameter
  *
- * constant-capable log of base 2 calculation
- * - this can be used to initialise global variables from constant data, hence
- * the massive ternary operator construction
- *
- * selects the appropriately-sized optimised version depending on sizeof(n)
+ * Use this where sparse expects a true constant expression, e.g. for array
+ * indices.
  */
-#define ilog2(n)				\
+#define const_ilog2(n)				\
 (						\
 	__builtin_constant_p(n) ? (		\
 		(n) < 2 ? 0 :			\
@@ -147,10 +144,26 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 		(n) & (1ULL <<  4) ?  4 :	\
 		(n) & (1ULL <<  3) ?  3 :	\
 		(n) & (1ULL <<  2) ?  2 :	\
-		1 ) :				\
-	(sizeof(n) <= 4) ?			\
-	__ilog2_u32(n) :			\
-	__ilog2_u64(n)				\
+		1) :				\
+	-1)
+
+/**
+ * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
+ * @n: parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+	__builtin_constant_p(n) ?	\
+	const_ilog2(n) :		\
+	(sizeof(n) <= 4) ?		\
+	__ilog2_u32(n) :		\
+	__ilog2_u64(n)			\
  )
 
 /**
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 7ae177c8e3993c0c39ac6539d149509b575cc1ec..4c36af6edd79b60081611d4729ad03570b471fc5 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -15,7 +15,7 @@ struct scsi_cmnd;
 struct scsi_lun;
 struct scsi_sense_hdr;
 
-typedef unsigned int __bitwise blist_flags_t;
+typedef __u64 __bitwise blist_flags_t;
 
 struct scsi_mode_data {
 	__u32	length;
diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
index ea67c32e870e9c330d3d89b1a9a1366077869809..3fdb322d4c4bbeaba31fc0ac0a5a912d924637f6 100644
--- a/include/scsi/scsi_devinfo.h
+++ b/include/scsi/scsi_devinfo.h
@@ -6,55 +6,80 @@
  */
 
 /* Only scan LUN 0 */
-#define BLIST_NOLUN		((__force blist_flags_t)(1 << 0))
+#define BLIST_NOLUN		((__force blist_flags_t)(1ULL << 0))
 /* Known to have LUNs, force scanning.
  * DEPRECATED: Use max_luns=N */
-#define BLIST_FORCELUN		((__force blist_flags_t)(1 << 1))
+#define BLIST_FORCELUN		((__force blist_flags_t)(1ULL << 1))
 /* Flag for broken handshaking */
-#define BLIST_BORKEN		((__force blist_flags_t)(1 << 2))
+#define BLIST_BORKEN		((__force blist_flags_t)(1ULL << 2))
 /* unlock by special command */
-#define BLIST_KEY		((__force blist_flags_t)(1 << 3))
+#define BLIST_KEY		((__force blist_flags_t)(1ULL << 3))
 /* Do not use LUNs in parallel */
-#define BLIST_SINGLELUN		((__force blist_flags_t)(1 << 4))
+#define BLIST_SINGLELUN		((__force blist_flags_t)(1ULL << 4))
 /* Buggy Tagged Command Queuing */
-#define BLIST_NOTQ		((__force blist_flags_t)(1 << 5))
+#define BLIST_NOTQ		((__force blist_flags_t)(1ULL << 5))
 /* Non consecutive LUN numbering */
-#define BLIST_SPARSELUN		((__force blist_flags_t)(1 << 6))
+#define BLIST_SPARSELUN		((__force blist_flags_t)(1ULL << 6))
 /* Avoid LUNS >= 5 */
-#define BLIST_MAX5LUN		((__force blist_flags_t)(1 << 7))
+#define BLIST_MAX5LUN		((__force blist_flags_t)(1ULL << 7))
 /* Treat as (removable) CD-ROM */
-#define BLIST_ISROM		((__force blist_flags_t)(1 << 8))
+#define BLIST_ISROM		((__force blist_flags_t)(1ULL << 8))
 /* LUNs past 7 on a SCSI-2 device */
-#define BLIST_LARGELUN		((__force blist_flags_t)(1 << 9))
+#define BLIST_LARGELUN		((__force blist_flags_t)(1ULL << 9))
 /* override additional length field */
-#define BLIST_INQUIRY_36	((__force blist_flags_t)(1 << 10))
+#define BLIST_INQUIRY_36	((__force blist_flags_t)(1ULL << 10))
+#define __BLIST_UNUSED_11	((__force blist_flags_t)(1ULL << 11))
 /* do not do automatic start on add */
-#define BLIST_NOSTARTONADD	((__force blist_flags_t)(1 << 12))
+#define BLIST_NOSTARTONADD	((__force blist_flags_t)(1ULL << 12))
+#define __BLIST_UNUSED_13	((__force blist_flags_t)(1ULL << 13))
+#define __BLIST_UNUSED_14	((__force blist_flags_t)(1ULL << 14))
+#define __BLIST_UNUSED_15	((__force blist_flags_t)(1ULL << 15))
+#define __BLIST_UNUSED_16	((__force blist_flags_t)(1ULL << 16))
 /* try REPORT_LUNS even for SCSI-2 devs (if HBA supports more than 8 LUNs) */
-#define BLIST_REPORTLUN2	((__force blist_flags_t)(1 << 17))
+#define BLIST_REPORTLUN2	((__force blist_flags_t)(1ULL << 17))
 /* don't try REPORT_LUNS scan (SCSI-3 devs) */
-#define BLIST_NOREPORTLUN	((__force blist_flags_t)(1 << 18))
+#define BLIST_NOREPORTLUN	((__force blist_flags_t)(1ULL << 18))
 /* don't use PREVENT-ALLOW commands */
-#define BLIST_NOT_LOCKABLE	((__force blist_flags_t)(1 << 19))
+#define BLIST_NOT_LOCKABLE	((__force blist_flags_t)(1ULL << 19))
 /* device is actually for RAID config */
-#define BLIST_NO_ULD_ATTACH	((__force blist_flags_t)(1 << 20))
+#define BLIST_NO_ULD_ATTACH	((__force blist_flags_t)(1ULL << 20))
 /* select without ATN */
-#define BLIST_SELECT_NO_ATN	((__force blist_flags_t)(1 << 21))
+#define BLIST_SELECT_NO_ATN	((__force blist_flags_t)(1ULL << 21))
 /* retry HARDWARE_ERROR */
-#define BLIST_RETRY_HWERROR	((__force blist_flags_t)(1 << 22))
+#define BLIST_RETRY_HWERROR	((__force blist_flags_t)(1ULL << 22))
 /* maximum 512 sector cdb length */
-#define BLIST_MAX_512		((__force blist_flags_t)(1 << 23))
+#define BLIST_MAX_512		((__force blist_flags_t)(1ULL << 23))
+#define __BLIST_UNUSED_24	((__force blist_flags_t)(1ULL << 24))
 /* Disable T10 PI (DIF) */
-#define BLIST_NO_DIF		((__force blist_flags_t)(1 << 25))
+#define BLIST_NO_DIF		((__force blist_flags_t)(1ULL << 25))
 /* Ignore SBC-3 VPD pages */
-#define BLIST_SKIP_VPD_PAGES	((__force blist_flags_t)(1 << 26))
+#define BLIST_SKIP_VPD_PAGES	((__force blist_flags_t)(1ULL << 26))
+#define __BLIST_UNUSED_27	((__force blist_flags_t)(1ULL << 27))
 /* Attempt to read VPD pages */
-#define BLIST_TRY_VPD_PAGES	((__force blist_flags_t)(1 << 28))
+#define BLIST_TRY_VPD_PAGES	((__force blist_flags_t)(1ULL << 28))
 /* don't try to issue RSOC */
-#define BLIST_NO_RSOC		((__force blist_flags_t)(1 << 29))
+#define BLIST_NO_RSOC		((__force blist_flags_t)(1ULL << 29))
 /* maximum 1024 sector cdb length */
-#define BLIST_MAX_1024		((__force blist_flags_t)(1 << 30))
+#define BLIST_MAX_1024		((__force blist_flags_t)(1ULL << 30))
 /* Use UNMAP limit for WRITE SAME */
-#define BLIST_UNMAP_LIMIT_WS	((__force blist_flags_t)(1 << 31))
+#define BLIST_UNMAP_LIMIT_WS	((__force blist_flags_t)(1ULL << 31))
+/* Always retry ABORTED_COMMAND with Internal Target Failure */
+#define BLIST_RETRY_ITF		((__force blist_flags_t)(1ULL << 32))
+/* Always retry ABORTED_COMMAND with ASC 0xc1 */
+#define BLIST_RETRY_ASC_C1	((__force blist_flags_t)(1ULL << 33))
+
+#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1
+
+#define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \
+			       (__force blist_flags_t) \
+			       ((__force __u64)__BLIST_LAST_USED - 1ULL)))
+#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_11 | \
+			     __BLIST_UNUSED_13 | \
+			     __BLIST_UNUSED_14 | \
+			     __BLIST_UNUSED_15 | \
+			     __BLIST_UNUSED_16 | \
+			     __BLIST_UNUSED_24 | \
+			     __BLIST_UNUSED_27 | \
+			     __BLIST_HIGH_UNUSED)
 
 #endif
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 0be80f72646b1f86f40a811c86d1946cd4ea59ac..6e299349b15876d3302cc784576dd84cff6f1d66 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -9,21 +9,22 @@
 
 #define TCMU_VERSION "2.0"
 
-/*
+/**
+ * DOC: Ring Design
  * Ring Design
  * -----------
  *
  * The mmaped area is divided into three parts:
- * 1) The mailbox (struct tcmu_mailbox, below)
- * 2) The command ring
- * 3) Everything beyond the command ring (data)
+ * 1) The mailbox (struct tcmu_mailbox, below);
+ * 2) The command ring;
+ * 3) Everything beyond the command ring (data).
  *
  * The mailbox tells userspace the offset of the command ring from the
  * start of the shared memory region, and how big the command ring is.
  *
  * The kernel passes SCSI commands to userspace by putting a struct
  * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
- * userspace via uio's interrupt mechanism.
+ * userspace via UIO's interrupt mechanism.
  *
  * tcmu_cmd_entry contains a header. If the header type is PAD,
  * userspace should skip hdr->length bytes (mod cmdr_size) to find the