diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 1c46f97d6664a850ea286f90b6a36e82d6b73074..ea49834377c8e17b6e8b221e4bd58389a8d7c32e 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 	unsigned num_bvecs;
 	sector_t remaining = where->count;
 	struct request_queue *q = bdev_get_queue(where->bdev);
-	sector_t discard_sectors;
+	unsigned short logical_block_size = queue_logical_block_size(q);
+	sector_t num_sectors;
 
 	/*
 	 * where->count may be zero if rw holds a flush and we need to
@@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		/*
 		 * Allocate a suitably sized-bio.
 		 */
-		if (rw & REQ_DISCARD)
+		if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
 			num_bvecs = 1;
 		else
 			num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
@@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		store_io_and_region_in_bio(bio, io, region);
 
 		if (rw & REQ_DISCARD) {
-			discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
-			bio->bi_size = discard_sectors << SECTOR_SHIFT;
-			remaining -= discard_sectors;
+			num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
+			bio->bi_size = num_sectors << SECTOR_SHIFT;
+			remaining -= num_sectors;
+		} else if (rw & REQ_WRITE_SAME) {
+			/*
+			 * WRITE SAME only uses a single page.
+			 */
+			dp->get_page(dp, &page, &len, &offset);
+			bio_add_page(bio, page, logical_block_size, offset);
+			num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
+			bio->bi_size = num_sectors << SECTOR_SHIFT;
+
+			offset = 0;
+			remaining -= num_sectors;
+			dp->next_page(dp);
 		} else while (remaining) {
 			/*
 			 * Try and add as many pages as possible.
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index bed444c93d8d49e03757fdf88c8facb7d98f07dc..68c02673263ba87b9749cda3e119838e75edf22a 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context)
 	struct dm_kcopyd_client *kc = job->kc;
 
 	if (error) {
-		if (job->rw == WRITE)
+		if (job->rw & WRITE)
 			job->write_err |= error;
 		else
 			job->read_err = 1;
@@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context)
 		}
 	}
 
-	if (job->rw == WRITE)
+	if (job->rw & WRITE)
 		push(&kc->complete_jobs, job);
 
 	else {
@@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
 
 		if (r < 0) {
 			/* error this rogue job */
-			if (job->rw == WRITE)
+			if (job->rw & WRITE)
 				job->write_err = (unsigned long) -1L;
 			else
 				job->read_err = 1;
@@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		   unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
 {
 	struct kcopyd_job *job;
+	int i;
 
 	/*
 	 * Allocate an array of jobs consisting of one master job
@@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		memset(&job->source, 0, sizeof job->source);
 		job->source.count = job->dests[0].count;
 		job->pages = &zero_page_list;
-		job->rw = WRITE;
+
+		/*
+		 * Use WRITE SAME to optimize zeroing if all dests support it.
+		 */
+		job->rw = WRITE | REQ_WRITE_SAME;
+		for (i = 0; i < job->num_dests; i++)
+			if (!bdev_write_same(job->dests[i].bdev)) {
+				job->rw = WRITE;
+				break;
+			}
 	}
 
 	job->fn = fn;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fba378f234a58516e0c1a1fd55f4a8bb25f82564..4b940745ba9e3a2b8e73f2cbed17508848cad106 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2779,7 +2779,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 5, 0},
+	.version = {1, 6, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,