Commit 98cc093c authored by Huang Ying's avatar Huang Ying Committed by Linus Torvalds

block, THP: make block_device_operations.rw_page support THP

The .rw_page in struct block_device_operations is used by the swap
subsystem to read/write the page contents from/into the corresponding
swap slot in the swap device.  To support the THP (Transparent Huge
Page) swap optimization, the .rw_page is enhanced to support to
read/write THP if possible.

Link: http://lkml.kernel.org/r/20170724051840.2309-6-ying.huang@intel.comSigned-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@intel.com> [for brd.c, zram_drv.c, pmem.c]
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal L Verma <vishal.l.verma@intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f0eea189
......@@ -326,7 +326,11 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct brd_device *brd = bdev->bd_disk->private_data;
int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
int err;
if (PageTransHuge(page))
return -ENOTSUPP;
err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
page_endio(page, is_write, err);
return err;
}
......
......@@ -1285,6 +1285,8 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct zram *zram;
struct bio_vec bv;
if (PageTransHuge(page))
return -ENOTSUPP;
zram = bdev->bd_disk->private_data;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
......
......@@ -1241,8 +1241,10 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector,
{
struct btt *btt = bdev->bd_disk->private_data;
int rc;
unsigned int len;
rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
len = hpage_nr_pages(page) * PAGE_SIZE;
rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector);
if (rc == 0)
page_endio(page, is_write, 0);
......
......@@ -80,22 +80,40 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
static void write_pmem(void *pmem_addr, struct page *page,
unsigned int off, unsigned int len)
{
void *mem = kmap_atomic(page);
memcpy_flushcache(pmem_addr, mem + off, len);
kunmap_atomic(mem);
unsigned int chunk;
void *mem;
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE);
memcpy_flushcache(pmem_addr, mem + off, chunk);
kunmap_atomic(mem);
len -= chunk;
off = 0;
page++;
pmem_addr += PAGE_SIZE;
}
}
static blk_status_t read_pmem(struct page *page, unsigned int off,
void *pmem_addr, unsigned int len)
{
unsigned int chunk;
int rc;
void *mem = kmap_atomic(page);
rc = memcpy_mcsafe(mem + off, pmem_addr, len);
kunmap_atomic(mem);
if (rc)
return BLK_STS_IOERR;
void *mem;
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE);
rc = memcpy_mcsafe(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rc)
return BLK_STS_IOERR;
len -= chunk;
off = 0;
page++;
pmem_addr += PAGE_SIZE;
}
return BLK_STS_OK;
}
......@@ -188,7 +206,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct pmem_device *pmem = bdev->bd_queue->queuedata;
blk_status_t rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
0, is_write, sector);
/*
* The ->rw_page interface is subtle and tricky. The core
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment