Commit 9b06860d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm and dax updates from Dan Williams:
 "There were multiple touches outside of drivers/nvdimm/ this round to
  add cross arch compatibility to the devm_memremap_pages() interface,
  enhance numa information for persistent memory ranges, and add a
  zero_page_range() dax operation.

  This cycle I switched from the patchwork api to Konstantin's b4 script
  for collecting tags (from x86, PowerPC, filesystem, and device-mapper
  folks), and everything looks to have gone ok there. This has all
  appeared in -next with no reported issues.

  Summary:

   - Add support for region alignment configuration and enforcement to
     fix compatibility across architectures and PowerPC page size
     configurations.

   - Introduce 'zero_page_range' as a dax operation. This facilitates
     filesystem-dax operation without a block-device.

   - Introduce phys_to_target_node() to facilitate drivers that want to
     know resulting numa node if a given reserved address range was
     onlined.

   - Advertise a persistence-domain for of_pmem and papr_scm. The
     persistence domain indicates where cpu-store cycles need to reach
     in the platform-memory subsystem before the platform will consider
     them power-fail protected.

   - Promote numa_map_to_online_node() to a cross-kernel generic
     facility.

   - Save x86 numa information to allow for node-id lookups for reserved
     memory ranges, deploy that capability for the e820-pmem driver.

   - Pick up some miscellaneous minor fixes, that missed v5.6-final,
     including a some smatch reports in the ioctl path and some unit
     test compilation fixups.

   - Fixup some flexible-array declarations"

* tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (29 commits)
  dax: Move mandatory ->zero_page_range() check in alloc_dax()
  dax,iomap: Add helper dax_iomap_zero() to zero a range
  dax: Use new dax zero page method for zeroing a page
  dm,dax: Add dax zero_page_range operation
  s390,dcssblk,dax: Add dax zero_page_range operation to dcssblk driver
  dax, pmem: Add a dax operation zero_page_range
  pmem: Add functions for reading/writing page to/from pmem
  libnvdimm: Update persistence domain value for of_pmem and papr_scm device
  tools/test/nvdimm: Fix out of tree build
  libnvdimm/region: Fix build error
  libnvdimm/region: Replace zero-length array with flexible-array member
  libnvdimm/label: Replace zero-length array with flexible-array member
  ACPI: NFIT: Replace zero-length array with flexible-array member
  libnvdimm/region: Introduce an 'align' attribute
  libnvdimm/region: Introduce NDD_LABELING
  libnvdimm/namespace: Enforce memremap_compat_align()
  libnvdimm/pfn: Prevent raw mode fallback if pfn-infoblock valid
  libnvdimm: Out of bounds read in __nd_ioctl()
  acpi/nfit: improve bounds checking for 'func'
  mm/memremap_pages: Introduce memremap_compat_align()
  ...
parents 0906d8b9 f6d2b802
...@@ -9670,6 +9670,7 @@ F: drivers/acpi/nfit/* ...@@ -9670,6 +9670,7 @@ F: drivers/acpi/nfit/*
F: include/linux/nd.h F: include/linux/nd.h
F: include/linux/libnvdimm.h F: include/linux/libnvdimm.h
F: include/uapi/linux/ndctl.h F: include/uapi/linux/ndctl.h
F: tools/testing/nvdimm/
LICENSES and SPDX stuff LICENSES and SPDX stuff
M: Thomas Gleixner <tglx@linutronix.de> M: Thomas Gleixner <tglx@linutronix.de>
......
...@@ -122,6 +122,7 @@ config PPC ...@@ -122,6 +122,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV select ARCH_HAS_KCOV
select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API select ARCH_HAS_PMEM_API
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <linux/io.h> #include <linux/io.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <asm/io-workarounds.h> #include <asm/io-workarounds.h>
...@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, ...@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
return NULL; return NULL;
} }
#ifdef CONFIG_ZONE_DEVICE
/*
* Override the generic version in mm/memremap.c.
*
* With hash translation, the direct-map range is mapped with just one
* page size selected by htab_init_page_sizes(). Consult
* mmu_psize_defs[] to determine the minimum page size alignment.
*/
unsigned long memremap_compat_align(void)
{
unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
if (radix_enabled())
return SUBSECTION_SIZE;
return max(SUBSECTION_SIZE, 1UL << shift);
}
EXPORT_SYMBOL_GPL(memremap_compat_align);
#endif
...@@ -286,25 +286,6 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, ...@@ -286,25 +286,6 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
return 0; return 0;
} }
static inline int papr_scm_node(int node)
{
int min_dist = INT_MAX, dist;
int nid, min_node;
if ((node == NUMA_NO_NODE) || node_online(node))
return node;
min_node = first_online_node;
for_each_online_node(nid) {
dist = node_distance(node, nid);
if (dist < min_dist) {
min_dist = dist;
min_node = nid;
}
}
return min_node;
}
static int papr_scm_nvdimm_init(struct papr_scm_priv *p) static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
{ {
struct device *dev = &p->pdev->dev; struct device *dev = &p->pdev->dev;
...@@ -329,7 +310,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ...@@ -329,7 +310,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
} }
dimm_flags = 0; dimm_flags = 0;
set_bit(NDD_ALIASING, &dimm_flags); set_bit(NDD_LABELING, &dimm_flags);
p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags, p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
PAPR_SCM_DIMM_CMD_MASK, 0, NULL); PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
...@@ -350,7 +331,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ...@@ -350,7 +331,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
memset(&ndr_desc, 0, sizeof(ndr_desc)); memset(&ndr_desc, 0, sizeof(ndr_desc));
target_nid = dev_to_node(&p->pdev->dev); target_nid = dev_to_node(&p->pdev->dev);
online_nid = papr_scm_node(target_nid); online_nid = numa_map_to_online_node(target_nid);
ndr_desc.numa_node = online_nid; ndr_desc.numa_node = online_nid;
ndr_desc.target_node = target_nid; ndr_desc.target_node = target_nid;
ndr_desc.res = &p->res; ndr_desc.res = &p->res;
...@@ -362,8 +343,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ...@@ -362,8 +343,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
if (p->is_volatile) if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
else else {
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
}
if (!p->region) { if (!p->region) {
dev_err(dev, "Error registering region %pR from %pOF\n", dev_err(dev, "Error registering region %pR from %pOF\n",
ndr_desc.res, p->dn); ndr_desc.res, p->dn);
......
...@@ -1661,6 +1661,7 @@ config X86_PMEM_LEGACY ...@@ -1661,6 +1661,7 @@ config X86_PMEM_LEGACY
depends on PHYS_ADDR_T_64BIT depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV depends on BLK_DEV
select X86_PMEM_LEGACY_DEVICE select X86_PMEM_LEGACY_DEVICE
select NUMA_KEEP_MEMINFO if NUMA
select LIBNVDIMM select LIBNVDIMM
help help
Treat memory marked using the non-standard e820 type of 12 as used Treat memory marked using the non-standard e820 type of 12 as used
......
...@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata; ...@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data); EXPORT_SYMBOL(node_data);
static struct numa_meminfo numa_meminfo static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
#ifndef CONFIG_MEMORY_HOTPLUG static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
__initdata
#endif
;
static int numa_distance_cnt; static int numa_distance_cnt;
static u8 *numa_distance; static u8 *numa_distance;
...@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) ...@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
(mi->nr_blks - idx) * sizeof(mi->blk[0])); (mi->nr_blks - idx) * sizeof(mi->blk[0]));
} }
/**
* numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
* @dst: numa_meminfo to append block to
* @idx: Index of memblk to remove
* @src: numa_meminfo to remove memblk from
*/
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
struct numa_meminfo *src)
{
dst->blk[dst->nr_blks++] = src->blk[idx];
numa_remove_memblk_from(idx, src);
}
/** /**
* numa_add_memblk - Add one numa_memblk to numa_meminfo * numa_add_memblk - Add one numa_memblk to numa_meminfo
* @nid: NUMA node ID of the new memblk * @nid: NUMA node ID of the new memblk
...@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) ...@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
for (i = 0; i < mi->nr_blks; i++) { for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i]; struct numa_memblk *bi = &mi->blk[i];
/* make sure all blocks are inside the limits */ /* move / save reserved memory ranges */
if (!memblock_overlaps_region(&memblock.memory,
bi->start, bi->end - bi->start)) {
numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
continue;
}
/* make sure all non-reserved blocks are inside the limits */
bi->start = max(bi->start, low); bi->start = max(bi->start, low);
bi->end = min(bi->end, high); bi->end = min(bi->end, high);
/* and there's no empty or non-exist block */ /* and there's no empty block */
if (bi->start >= bi->end || if (bi->start >= bi->end)
!memblock_overlaps_region(&memblock.memory,
bi->start, bi->end - bi->start))
numa_remove_memblk_from(i--, mi); numa_remove_memblk_from(i--, mi);
} }
...@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node); ...@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_NUMA_KEEP_MEMINFO
int memory_add_physaddr_to_nid(u64 start) static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{ {
struct numa_meminfo *mi = &numa_meminfo;
int nid = mi->blk[0].nid;
int i; int i;
for (i = 0; i < mi->nr_blks; i++) for (i = 0; i < mi->nr_blks; i++)
if (mi->blk[i].start <= start && mi->blk[i].end > start) if (mi->blk[i].start <= start && mi->blk[i].end > start)
nid = mi->blk[i].nid; return mi->blk[i].nid;
return NUMA_NO_NODE;
}
int phys_to_target_node(phys_addr_t start)
{
int nid = meminfo_to_nid(&numa_meminfo, start);
/*
* Prefer online nodes, but if reserved memory might be
* hot-added continue the search with reserved ranges.
*/
if (nid != NUMA_NO_NODE)
return nid;
return meminfo_to_nid(&numa_reserved_meminfo, start);
}
EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start)
{
int nid = meminfo_to_nid(&numa_meminfo, start);
if (nid == NUMA_NO_NODE)
nid = numa_meminfo.blk[0].nid;
return nid; return nid;
} }
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
......
...@@ -360,7 +360,7 @@ static union acpi_object *acpi_label_info(acpi_handle handle) ...@@ -360,7 +360,7 @@ static union acpi_object *acpi_label_info(acpi_handle handle)
static u8 nfit_dsm_revid(unsigned family, unsigned func) static u8 nfit_dsm_revid(unsigned family, unsigned func)
{ {
static const u8 revid_table[NVDIMM_FAMILY_MAX+1][32] = { static const u8 revid_table[NVDIMM_FAMILY_MAX+1][NVDIMM_CMD_MAX+1] = {
[NVDIMM_FAMILY_INTEL] = { [NVDIMM_FAMILY_INTEL] = {
[NVDIMM_INTEL_GET_MODES] = 2, [NVDIMM_INTEL_GET_MODES] = 2,
[NVDIMM_INTEL_GET_FWINFO] = 2, [NVDIMM_INTEL_GET_FWINFO] = 2,
...@@ -386,7 +386,7 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) ...@@ -386,7 +386,7 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func)
if (family > NVDIMM_FAMILY_MAX) if (family > NVDIMM_FAMILY_MAX)
return 0; return 0;
if (func > 31) if (func > NVDIMM_CMD_MAX)
return 0; return 0;
id = revid_table[family][func]; id = revid_table[family][func];
if (id == 0) if (id == 0)
...@@ -492,7 +492,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, ...@@ -492,7 +492,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
* Check for a valid command. For ND_CMD_CALL, we also have to * Check for a valid command. For ND_CMD_CALL, we also have to
* make sure that the DSM function is supported. * make sure that the DSM function is supported.
*/ */
if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask)) if (cmd == ND_CMD_CALL &&
(func > NVDIMM_CMD_MAX || !test_bit(func, &dsm_mask)))
return -ENOTTY; return -ENOTTY;
else if (!test_bit(cmd, &cmd_mask)) else if (!test_bit(cmd, &cmd_mask))
return -ENOTTY; return -ENOTTY;
...@@ -2026,8 +2027,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) ...@@ -2026,8 +2027,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
continue; continue;
} }
if (nfit_mem->bdw && nfit_mem->memdev_pmem) if (nfit_mem->bdw && nfit_mem->memdev_pmem) {
set_bit(NDD_ALIASING, &flags); set_bit(NDD_ALIASING, &flags);
set_bit(NDD_LABELING, &flags);
}
/* collate flags across all memdevs for this dimm */ /* collate flags across all memdevs for this dimm */
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
...@@ -3492,7 +3495,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, ...@@ -3492,7 +3495,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
if (nvdimm && cmd == ND_CMD_CALL && if (nvdimm && cmd == ND_CMD_CALL &&
call_pkg->nd_family == NVDIMM_FAMILY_INTEL) { call_pkg->nd_family == NVDIMM_FAMILY_INTEL) {
func = call_pkg->nd_command; func = call_pkg->nd_command;
if ((1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK) if (func > NVDIMM_CMD_MAX ||
(1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK)
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV #define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV
#define NVDIMM_CMD_MAX 31
#define NVDIMM_STANDARD_CMDMASK \ #define NVDIMM_STANDARD_CMDMASK \
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \ (1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
...@@ -144,32 +145,32 @@ struct nfit_spa { ...@@ -144,32 +145,32 @@ struct nfit_spa {
unsigned long ars_state; unsigned long ars_state;
u32 clear_err_unit; u32 clear_err_unit;
u32 max_ars; u32 max_ars;
struct acpi_nfit_system_address spa[0]; struct acpi_nfit_system_address spa[];
}; };
struct nfit_dcr { struct nfit_dcr {
struct list_head list; struct list_head list;
struct acpi_nfit_control_region dcr[0]; struct acpi_nfit_control_region dcr[];
}; };
struct nfit_bdw { struct nfit_bdw {
struct list_head list; struct list_head list;
struct acpi_nfit_data_region bdw[0]; struct acpi_nfit_data_region bdw[];
}; };
struct nfit_idt { struct nfit_idt {
struct list_head list; struct list_head list;
struct acpi_nfit_interleave idt[0]; struct acpi_nfit_interleave idt[];
}; };
struct nfit_flush { struct nfit_flush {
struct list_head list; struct list_head list;
struct acpi_nfit_flush_address flush[0]; struct acpi_nfit_flush_address flush[];
}; };
struct nfit_memdev { struct nfit_memdev {
struct list_head list; struct list_head list;
struct acpi_nfit_memory_map memdev[0]; struct acpi_nfit_memory_map memdev[];
}; };
enum nfit_mem_flags { enum nfit_mem_flags {
......
...@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm) ...@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
} }
EXPORT_SYMBOL(acpi_map_pxm_to_node); EXPORT_SYMBOL(acpi_map_pxm_to_node);
/**
* acpi_map_pxm_to_online_node - Map proximity ID to online node
* @pxm: ACPI proximity ID
*
* This is similar to acpi_map_pxm_to_node(), but always returns an online
* node. When the mapped node from a given proximity ID is offline, it
* looks up the node distance table and returns the nearest online node.
*
* ACPI device drivers, which are called after the NUMA initialization has
* completed in the kernel, can call this interface to obtain their device
* NUMA topology from ACPI tables. Such drivers do not have to deal with
* offline nodes. A node may be offline when a device proximity ID is
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
* "numa=off" on x86.
*/
int acpi_map_pxm_to_online_node(int pxm)
{
int node, min_node;
node = acpi_map_pxm_to_node(pxm);
if (node == NUMA_NO_NODE)
node = 0;
min_node = node;
if (!node_online(node)) {
int min_dist = INT_MAX, dist, n;
for_each_online_node(n) {
dist = node_distance(node, n);
if (dist < min_dist) {
min_dist = dist;
min_node = n;
}
}
}
return min_node;
}
EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
static void __init static void __init
acpi_table_print_srat_entry(struct acpi_subtable_header *header) acpi_table_print_srat_entry(struct acpi_subtable_header *header)
{ {
......
...@@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, ...@@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
* device outside of mmap of the resulting character device. * device outside of mmap of the resulting character device.
*/ */
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
if (!dax_dev) if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev);
goto err; goto err;
}
/* a device_dax instance is dead while the driver is not attached */ /* a device_dax instance is dead while the driver is not attached */
kill_dax(dax_dev); kill_dax(dax_dev);
......
...@@ -344,6 +344,23 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, ...@@ -344,6 +344,23 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
} }
EXPORT_SYMBOL_GPL(dax_copy_to_iter); EXPORT_SYMBOL_GPL(dax_copy_to_iter);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
{
if (!dax_alive(dax_dev))
return -ENXIO;
/*
* There are no callers that want to zero more than one page as of now.
* Once users are there, this check can be removed after the
* device mapper code has been updated to split ranges across targets.
*/
if (nr_pages != 1)
return -EIO;
return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
}
EXPORT_SYMBOL_GPL(dax_zero_page_range);
#ifdef CONFIG_ARCH_HAS_PMEM_API #ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size); void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
...@@ -551,9 +568,16 @@ struct dax_device *alloc_dax(void *private, const char *__host, ...@@ -551,9 +568,16 @@ struct dax_device *alloc_dax(void *private, const char *__host,
dev_t devt; dev_t devt;
int minor; int minor;
if (ops && !ops->zero_page_range) {
pr_debug("%s: error: device does not provide dax"
" operation zero_page_range()\n",
__host ? __host : "Unknown");
return ERR_PTR(-EINVAL);
}
host = kstrdup(__host, GFP_KERNEL); host = kstrdup(__host, GFP_KERNEL);
if (__host && !host) if (__host && !host)
return NULL; return ERR_PTR(-ENOMEM);
minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
if (minor < 0) if (minor < 0)
...@@ -576,7 +600,7 @@ struct dax_device *alloc_dax(void *private, const char *__host, ...@@ -576,7 +600,7 @@ struct dax_device *alloc_dax(void *private, const char *__host,
ida_simple_remove(&dax_minor_ida, minor); ida_simple_remove(&dax_minor_ida, minor);
err_minor: err_minor:
kfree(host); kfree(host);
return NULL; return ERR_PTR(-ENOMEM);
} }
EXPORT_SYMBOL_GPL(alloc_dax); EXPORT_SYMBOL_GPL(alloc_dax);
......
...@@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, ...@@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
} }
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
}
#else #else
#define linear_dax_direct_access NULL #define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL #define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL #define linear_dax_copy_to_iter NULL
#define linear_dax_zero_page_range NULL
#endif #endif
static struct target_type linear_target = { static struct target_type linear_target = {
...@@ -226,6 +243,7 @@ static struct target_type linear_target = { ...@@ -226,6 +243,7 @@ static struct target_type linear_target = {
.direct_access = linear_dax_direct_access, .direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter, .dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter, .dax_copy_to_iter = linear_dax_copy_to_iter,
.dax_zero_page_range = linear_dax_zero_page_range,
}; };
int __init dm_linear_init(void) int __init dm_linear_init(void)
......
...@@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, ...@@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
} }
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
&pgoff);
if (ret)
return ret;
return dax_zero_page_range(lc->dev->dax_dev, pgoff,
nr_pages << PAGE_SHIFT);
}
#else #else
#define log_writes_dax_direct_access NULL #define log_writes_dax_direct_access NULL