diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index e3cd4493d81d3941060591157c10fa1964163e13..cd0d69fa7592e64d6ac564eafac8ee622ea7ccfc 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -25,7 +25,7 @@ struct iommu_arena {
 };
 
 struct iommu {
-	struct iommu_table	tbl;
+	struct iommu_map_table	tbl;
 	spinlock_t		lock;
 	u32			dma_addr_mask;
 	iopte_t			*page_table;
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 9b16b341b6ae6d02627d185fe927bbd87ca64f56..5320689c06e97a5731825b5c2b45d955ef340af3 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -13,15 +13,12 @@
 #include <linux/errno.h>
 #include <linux/iommu-helper.h>
 #include <linux/bitmap.h>
-#include <linux/hash.h>
 #include <linux/iommu-common.h>
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
 #endif
 
-static	DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
-
 #include <asm/iommu.h>
 
 #include "iommu_common.h"
@@ -49,9 +46,9 @@ static	DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
 			       "i" (ASI_PHYS_BYPASS_EC_E))
 
 /* Must be invoked under the IOMMU lock. */
-static void iommu_flushall(struct iommu_table *iommu_table)
+static void iommu_flushall(struct iommu_map_table *iommu_map_table)
 {
-	struct iommu *iommu = container_of(iommu_table, struct iommu, tbl);
+	struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
 	if (iommu->iommu_flushinv) {
 		iommu_write(iommu->iommu_flushinv, ~(u64)0);
 	} else {
@@ -92,23 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
 	iopte_val(*iopte) = val;
 }
 
-static struct iommu_tbl_ops iommu_sparc_ops = {
-	.reset	= iommu_flushall
-};
-
-static void setup_iommu_pool_hash(void)
-{
-	unsigned int i;
-	static bool do_once;
-
-	if (do_once)
-		return;
-	do_once = true;
-	for_each_possible_cpu(i)
-		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
-}
-
-
 int iommu_table_init(struct iommu *iommu, int tsbsize,
 		     u32 dma_offset, u32 dma_addr_mask,
 		     int numa_node)
@@ -121,7 +101,7 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
 	iommu->ctx_lowest_free = 1;
-	iommu->tbl.page_table_map_base = dma_offset;
+	iommu->tbl.table_map_base = dma_offset;
 	iommu->dma_addr_mask = dma_addr_mask;
 
 	/* Allocate and initialize the free area map.  */
@@ -131,12 +111,10 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
 	if (!iommu->tbl.map)
 		return -ENOMEM;
 	memset(iommu->tbl.map, 0, sz);
-	if (tlb_type != hypervisor)
-		iommu_sparc_ops.reset = NULL; /* not needed on on sun4v */
 
-	setup_iommu_pool_hash();
 	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
-			    &iommu_sparc_ops, false, 1);
+			    (tlb_type != hypervisor ? iommu_flushall : NULL),
+			    false, 1, false);
 
 	/* Allocate and initialize the dummy page which we
 	 * set inactive IO PTEs to point to.
@@ -182,7 +160,7 @@ static inline iopte_t *alloc_npages(struct device *dev,
 	unsigned long entry;
 
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
-				      __this_cpu_read(iommu_pool_hash));
+				      (unsigned long)(-1), 0);
 	if (unlikely(entry == DMA_ERROR_CODE))
 		return NULL;
 
@@ -249,7 +227,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
 		return NULL;
 	}
 
-	*dma_addrp = (iommu->tbl.page_table_map_base +
+	*dma_addrp = (iommu->tbl.table_map_base +
 		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = (void *) first_page;
 	npages = size >> IO_PAGE_SHIFT;
@@ -275,7 +253,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	iommu = dev->archdata.iommu;
 
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, false, NULL);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
 
 	order = get_order(size);
 	if (order < 10)
@@ -315,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
 	if (unlikely(!base))
 		goto bad;
 
-	bus_addr = (iommu->tbl.page_table_map_base +
+	bus_addr = (iommu->tbl.table_map_base +
 		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
@@ -426,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 	base = iommu->page_table +
-		((bus_addr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT);
+		((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
 	bus_addr &= IO_PAGE_MASK;
 
 	spin_lock_irqsave(&iommu->lock, flags);
@@ -448,8 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	iommu_free_ctx(iommu, ctx);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages,
-			     false, NULL);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
 }
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -497,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 	max_seg_size = dma_get_max_seg_size(dev);
 	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
-	base_shift = iommu->tbl.page_table_map_base >> IO_PAGE_SHIFT;
+	base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
 	for_each_sg(sglist, s, nelems, i) {
 		unsigned long paddr, npages, entry, out_entry = 0, slen;
 		iopte_t *base;
@@ -511,8 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 		/* Allocate iommu entries for that segment */
 		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
 		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
-		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, &handle,
-					      __this_cpu_read(iommu_pool_hash));
+		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
 		if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -525,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 		base = iommu->page_table + entry;
 
 		/* Convert entry to a dma_addr_t */
-		dma_addr = iommu->tbl.page_table_map_base +
+		dma_addr = iommu->tbl.table_map_base +
 			(entry << IO_PAGE_SHIFT);
 		dma_addr |= (s->offset & ~IO_PAGE_MASK);
 
@@ -586,7 +563,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
 
-			entry = (vaddr - iommu->tbl.page_table_map_base)
+			entry = (vaddr - iommu->tbl.table_map_base)
 				>> IO_PAGE_SHIFT;
 			base = iommu->page_table + entry;
 
@@ -594,7 +571,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 				iopte_make_dummy(iommu, base + j);
 
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     false, NULL);
+					     DMA_ERROR_CODE);
 
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -610,19 +587,18 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 /* If contexts are being used, they are the same in all of the mappings
  * we make for a particular SG.
  */
-static unsigned long fetch_sg_ctx(struct iommu *iommu,
-				  struct scatterlist *sg)
+static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
 {
 	unsigned long ctx = 0;
 
 	if (iommu->iommu_ctxflush) {
 		iopte_t *base;
 		u32 bus_addr;
-		struct iommu_table *tbl = &iommu->tbl;
+		struct iommu_map_table *tbl = &iommu->tbl;
 
 		bus_addr = sg->dma_address & IO_PAGE_MASK;
 		base = iommu->page_table +
-		       ((bus_addr - tbl->page_table_map_base) >> IO_PAGE_SHIFT);
+			((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);
 
 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
 	}
@@ -659,7 +635,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			break;
 		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
 
-		entry = ((dma_handle - iommu->tbl.page_table_map_base)
+		entry = ((dma_handle - iommu->tbl.table_map_base)
 			 >> IO_PAGE_SHIFT);
 		base = iommu->page_table + entry;
 
@@ -671,8 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		for (i = 0; i < npages; i++)
 			iopte_make_dummy(iommu, base + i);
 
-		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, false,
-				     NULL);
+		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+				     DMA_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
@@ -706,10 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
 	if (iommu->iommu_ctxflush &&
 	    strbuf->strbuf_ctxflush) {
 		iopte_t *iopte;
-		struct iommu_table *tbl = &iommu->tbl;
+		struct iommu_map_table *tbl = &iommu->tbl;
 
 		iopte = iommu->page_table +
-			((bus_addr - tbl->page_table_map_base)>>IO_PAGE_SHIFT);
+			((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
 	}
 
@@ -742,10 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 	if (iommu->iommu_ctxflush &&
 	    strbuf->strbuf_ctxflush) {
 		iopte_t *iopte;
-		struct iommu_table *tbl = &iommu->tbl;
+		struct iommu_map_table *tbl = &iommu->tbl;
 
 		iopte = iommu->page_table + ((sglist[0].dma_address -
-			tbl->page_table_map_base) >> IO_PAGE_SHIFT);
+			tbl->table_map_base) >> IO_PAGE_SHIFT);
 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
 	}
 
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index d485697c37c0d61fbfc97ebf0f1c6755abd16020..d2ae0f70059ead513bde0ff52ffa02de6615d9d8 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -15,7 +15,6 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/bitmap.h>
-#include <linux/hash.h>
 #include <linux/iommu-common.h>
 
 #include <asm/hypervisor.h>
@@ -32,7 +31,6 @@
 #define COOKIE_PGSZ_CODE	0xf000000000000000ULL
 #define COOKIE_PGSZ_CODE_SHIFT	60ULL
 
-static DEFINE_PER_CPU(unsigned int, ldc_pool_hash);
 
 static char version[] =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -108,7 +106,7 @@ struct ldc_iommu {
 	/* Protects ldc_unmap.  */
 	spinlock_t			lock;
 	struct ldc_mtable_entry		*page_table;
-	struct iommu_table		iommu_table;
+	struct iommu_map_table		iommu_map_table;
 };
 
 struct ldc_channel {
@@ -1015,18 +1013,9 @@ static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
 	return (cookie >> (13ULL + (szcode * 3ULL)));
 }
 
-struct ldc_demap_arg {
-	struct ldc_iommu *ldc_iommu;
-	u64 cookie;
-	unsigned long id;
-};
-
-static void ldc_demap(void *arg, unsigned long entry, unsigned long npages)
+static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
+		      unsigned long entry, unsigned long npages)
 {
-	struct ldc_demap_arg *ldc_demap_arg = arg;
-	struct ldc_iommu *iommu = ldc_demap_arg->ldc_iommu;
-	unsigned long id = ldc_demap_arg->id;
-	u64 cookie = ldc_demap_arg->cookie;
 	struct ldc_mtable_entry *base;
 	unsigned long i, shift;
 
@@ -1043,36 +1032,17 @@ static void ldc_demap(void *arg, unsigned long entry, unsigned long npages)
 /* XXX Make this configurable... XXX */
 #define LDC_IOTABLE_SIZE	(8 * 1024)
 
-struct iommu_tbl_ops ldc_iommu_ops = {
-	.cookie_to_index = ldc_cookie_to_index,
-	.demap = ldc_demap,
-};
-
-static void setup_ldc_pool_hash(void)
-{
-	unsigned int i;
-	static bool do_once;
-
-	if (do_once)
-		return;
-	do_once = true;
-	for_each_possible_cpu(i)
-		per_cpu(ldc_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
-}
-
-
 static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
 {
 	unsigned long sz, num_tsb_entries, tsbsize, order;
 	struct ldc_iommu *ldc_iommu = &lp->iommu;
-	struct iommu_table *iommu = &ldc_iommu->iommu_table;
+	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
 	struct ldc_mtable_entry *table;
 	unsigned long hv_err;
 	int err;
 
 	num_tsb_entries = LDC_IOTABLE_SIZE;
 	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
-	setup_ldc_pool_hash();
 	spin_lock_init(&ldc_iommu->lock);
 
 	sz = num_tsb_entries / 8;
@@ -1083,7 +1053,9 @@ static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
 		return -ENOMEM;
 	}
 	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
-			    &ldc_iommu_ops, false, 1);
+			    NULL, false /* no large pool */,
+			    1 /* npools */,
+			    true /* skip span boundary check */);
 
 	order = get_order(tsbsize);
 
@@ -1122,7 +1094,7 @@ static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
 static void ldc_iommu_release(struct ldc_channel *lp)
 {
 	struct ldc_iommu *ldc_iommu = &lp->iommu;
-	struct iommu_table *iommu = &ldc_iommu->iommu_table;
+	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
 	unsigned long num_tsb_entries, tsbsize, order;
 
 	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
@@ -1979,8 +1951,8 @@ static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
 {
 	long entry;
 
-	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_table, npages,
-				     NULL, __this_cpu_read(ldc_pool_hash));
+	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
+				      npages, NULL, (unsigned long)-1, 0);
 	if (unlikely(entry < 0))
 		return NULL;
 
@@ -2191,17 +2163,13 @@ EXPORT_SYMBOL(ldc_map_single);
 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
 			u64 cookie, u64 size)
 {
-	unsigned long npages;
-	struct ldc_demap_arg demap_arg;
-
-	demap_arg.ldc_iommu = iommu;
-	demap_arg.cookie = cookie;
-	demap_arg.id = id;
+	unsigned long npages, entry;
 
 	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
-	iommu_tbl_range_free(&iommu->iommu_table, cookie, npages, true,
-			     &demap_arg);
 
+	entry = ldc_cookie_to_index(cookie, iommu);
+	ldc_demap(iommu, id, cookie, entry, npages);
+	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
 }
 
 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 9b76b9d639e1c0e857bbc74e2643113081b68b2e..d2fe57dad433ea9409c52a3aaf8ae9efa2113032 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/log2.h>
 #include <linux/of_device.h>
-#include <linux/hash.h>
 #include <linux/iommu-common.h>
 
 #include <asm/iommu.h>
@@ -30,7 +29,6 @@
 
 #define DRIVER_NAME	"pci_sun4v"
 #define PFX		DRIVER_NAME ": "
-static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
 
 static unsigned long vpci_major = 1;
 static unsigned long vpci_minor = 1;
@@ -159,13 +157,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	iommu = dev->archdata.iommu;
 
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
-				      __this_cpu_read(iommu_pool_hash));
+				      (unsigned long)(-1), 0);
 
 	if (unlikely(entry == DMA_ERROR_CODE))
 		goto range_alloc_fail;
 
-	*dma_addrp = (iommu->tbl.page_table_map_base +
-		      (entry << IO_PAGE_SHIFT));
+	*dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
 	ret = (void *) first_page;
 	first_page = __pa(first_page);
 
@@ -190,7 +187,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	return ret;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, false, NULL);
+	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
 
 range_alloc_fail:
 	free_pages(first_page, order);
@@ -227,9 +224,9 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 	iommu = dev->archdata.iommu;
 	pbm = dev->archdata.host_controller;
 	devhandle = pbm->devhandle;
-	entry = ((dvma - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT);
+	entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, false, NULL);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
 	order = get_order(size);
 	if (order < 10)
 		free_pages((unsigned long)cpu, order);
@@ -257,13 +254,12 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 	npages >>= IO_PAGE_SHIFT;
 
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
-				      __this_cpu_read(iommu_pool_hash));
+				      (unsigned long)(-1), 0);
 
 	if (unlikely(entry == DMA_ERROR_CODE))
 		goto bad;
 
-	bus_addr = (iommu->tbl.page_table_map_base +
-		    (entry << IO_PAGE_SHIFT));
+	bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
 	prot = HV_PCI_MAP_ATTR_READ;
@@ -292,7 +288,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 	return DMA_ERROR_CODE;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, false, NULL);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
 	return DMA_ERROR_CODE;
 }
 
@@ -319,9 +315,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 	bus_addr &= IO_PAGE_MASK;
-	entry = (bus_addr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT;
+	entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, false, NULL);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -363,7 +359,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 	max_seg_size = dma_get_max_seg_size(dev);
 	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
-	base_shift = iommu->tbl.page_table_map_base >> IO_PAGE_SHIFT;
+	base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
 	for_each_sg(sglist, s, nelems, i) {
 		unsigned long paddr, npages, entry, out_entry = 0, slen;
 
@@ -376,8 +372,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 		/* Allocate iommu entries for that segment */
 		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
 		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
-		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, &handle,
-				      __this_cpu_read(iommu_pool_hash));
+		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
 		if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -390,8 +386,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 		iommu_batch_new_entry(entry);
 
 		/* Convert entry to a dma_addr_t */
-		dma_addr = iommu->tbl.page_table_map_base +
-			(entry << IO_PAGE_SHIFT);
+		dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
 		dma_addr |= (s->offset & ~IO_PAGE_MASK);
 
 		/* Insert into HW table */
@@ -456,7 +451,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     false, NULL);
+					     DMA_ERROR_CODE);
 			/* XXX demap? XXX */
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -492,16 +487,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		dma_addr_t dma_handle = sg->dma_address;
 		unsigned int len = sg->dma_length;
 		unsigned long npages;
-		struct iommu_table *tbl = &iommu->tbl;
+		struct iommu_map_table *tbl = &iommu->tbl;
 		unsigned long shift = IO_PAGE_SHIFT;
 
 		if (!len)
 			break;
 		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
-		entry = ((dma_handle - tbl->page_table_map_base) >> shift);
+		entry = ((dma_handle - tbl->table_map_base) >> shift);
 		dma_4v_iommu_demap(&devhandle, entry, npages);
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     false, NULL);
+				     DMA_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
@@ -517,8 +512,6 @@ static struct dma_map_ops sun4v_dma_ops = {
 	.unmap_sg			= dma_4v_unmap_sg,
 };
 
-static struct iommu_tbl_ops dma_4v_iommu_ops;
-
 static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
 {
 	struct property *prop;
@@ -533,7 +526,7 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
 }
 
 static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
-					    struct iommu_table *iommu)
+					    struct iommu_map_table *iommu)
 {
 	struct iommu_pool *pool;
 	unsigned long i, pool_nr, cnt = 0;
@@ -541,7 +534,7 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
 
 	devhandle = pbm->devhandle;
 	for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
-		pool = &(iommu->arena_pool[pool_nr]);
+		pool = &(iommu->pools[pool_nr]);
 		for (i = pool->start; i <= pool->end; i++) {
 			unsigned long ret, io_attrs, ra;
 
@@ -587,8 +580,9 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 	dma_offset = vdma[0];
 
 	/* Setup initial software IOMMU state. */
+	spin_lock_init(&iommu->lock);
 	iommu->ctx_lowest_free = 1;
-	iommu->tbl.page_table_map_base = dma_offset;
+	iommu->tbl.table_map_base = dma_offset;
 	iommu->dma_addr_mask = dma_mask;
 
 	/* Allocate and initialize the free area map.  */
@@ -600,8 +594,9 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 		return -ENOMEM;
 	}
 	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
-			    &dma_4v_iommu_ops, false /* no large_pool */,
-			    0 /* default npools */);
+			    NULL, false /* no large_pool */,
+			    0 /* default npools */,
+			    false /* want span boundary checking */);
 	sz = probe_existing_entries(pbm, &iommu->tbl);
 	if (sz)
 		printk("%s: Imported %lu TSB entries from OBP\n",
@@ -1001,17 +996,8 @@ static struct platform_driver pci_sun4v_driver = {
 	.probe		= pci_sun4v_probe,
 };
 
-static void setup_iommu_pool_hash(void)
-{
-	unsigned int i;
-
-	for_each_possible_cpu(i)
-		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
-}
-
 static int __init pci_sun4v_init(void)
 {
-	setup_iommu_pool_hash();
 	return platform_driver_register(&pci_sun4v_driver);
 }
 
diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h
index 6be5c863f3290cfadc3429e5a8baa0bb0a60a066..bbced83b32ee1c45da17450f34fa16a26ec3092a 100644
--- a/include/linux/iommu-common.h
+++ b/include/linux/iommu-common.h
@@ -15,41 +15,37 @@ struct iommu_pool {
 	spinlock_t	lock;
 };
 
-struct iommu_table;
-
-struct iommu_tbl_ops {
-	unsigned long	(*cookie_to_index)(u64, void *);
-	void		(*demap)(void *, unsigned long, unsigned long);
-	void		(*reset)(struct iommu_table *);
-};
-
-struct iommu_table {
-	unsigned long		page_table_map_base;
-	unsigned long		page_table_shift;
+struct iommu_map_table {
+	unsigned long		table_map_base;
+	unsigned long		table_shift;
 	unsigned long		nr_pools;
-	const struct iommu_tbl_ops  *iommu_tbl_ops;
+	void			(*lazy_flush)(struct iommu_map_table *);
 	unsigned long		poolsize;
-	struct iommu_pool	arena_pool[IOMMU_NR_POOLS];
+	struct iommu_pool	pools[IOMMU_NR_POOLS];
 	u32			flags;
 #define	IOMMU_HAS_LARGE_POOL	0x00000001
+#define	IOMMU_NO_SPAN_BOUND	0x00000002
+#define	IOMMU_NEED_FLUSH	0x00000004
 	struct iommu_pool	large_pool;
 	unsigned long		*map;
 };
 
-extern void iommu_tbl_pool_init(struct iommu_table *iommu,
+extern void iommu_tbl_pool_init(struct iommu_map_table *iommu,
 				unsigned long num_entries,
-				u32 page_table_shift,
-				const struct iommu_tbl_ops *iommu_tbl_ops,
-				bool large_pool, u32 npools);
+				u32 table_shift,
+				void (*lazy_flush)(struct iommu_map_table *),
+				bool large_pool, u32 npools,
+				bool skip_span_boundary_check);
 
 extern unsigned long iommu_tbl_range_alloc(struct device *dev,
-					   struct iommu_table *iommu,
+					   struct iommu_map_table *iommu,
 					   unsigned long npages,
 					   unsigned long *handle,
-					   unsigned int pool_hash);
+					   unsigned long mask,
+					   unsigned int align_order);
 
-extern void iommu_tbl_range_free(struct iommu_table *iommu,
+extern void iommu_tbl_range_free(struct iommu_map_table *iommu,
 				 u64 dma_addr, unsigned long npages,
-				 bool do_demap, void *demap_arg);
+				 unsigned long entry);
 
 #endif
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
index fac4f35250c994d7465ace3f2bb1d4f0d91c85ad..a1a517cba7ec3bea6140957d2e549317717dedcf 100644
--- a/lib/iommu-common.c
+++ b/lib/iommu-common.c
@@ -9,37 +9,72 @@
 #include <linux/iommu-helper.h>
 #include <linux/iommu-common.h>
 #include <linux/dma-mapping.h>
+#include <linux/hash.h>
 
 #ifndef	DMA_ERROR_CODE
 #define	DMA_ERROR_CODE (~(dma_addr_t)0x0)
 #endif
 
-#define IOMMU_LARGE_ALLOC	15
+unsigned long iommu_large_alloc = 15;
+
+static	DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+static inline bool need_flush(struct iommu_map_table *iommu)
+{
+	return (iommu->lazy_flush != NULL &&
+		(iommu->flags & IOMMU_NEED_FLUSH) != 0);
+}
+
+static inline void set_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags |= IOMMU_NEED_FLUSH;
+}
+
+static inline void clear_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags &= ~IOMMU_NEED_FLUSH;
+}
+
+static void setup_iommu_pool_hash(void)
+{
+	unsigned int i;
+	static bool do_once;
+
+	if (do_once)
+		return;
+	do_once = true;
+	for_each_possible_cpu(i)
+		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+}
 
 /*
- * Initialize iommu_pool entries for the iommu_table. `num_entries'
+ * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
  * is the number of table entries. If `large_pool' is set to true,
  * the top 1/4 of the table will be set aside for pool allocations
- * of more than IOMMU_LARGE_ALLOC pages.
+ * of more than iommu_large_alloc pages.
  */
-extern void iommu_tbl_pool_init(struct iommu_table *iommu,
+extern void iommu_tbl_pool_init(struct iommu_map_table *iommu,
 				unsigned long num_entries,
-				u32 page_table_shift,
-				const struct iommu_tbl_ops *iommu_tbl_ops,
-				bool large_pool, u32 npools)
+				u32 table_shift,
+				void (*lazy_flush)(struct iommu_map_table *),
+				bool large_pool, u32 npools,
+				bool skip_span_boundary_check)
 {
 	unsigned int start, i;
 	struct iommu_pool *p = &(iommu->large_pool);
 
+	setup_iommu_pool_hash();
 	if (npools == 0)
 		iommu->nr_pools = IOMMU_NR_POOLS;
 	else
 		iommu->nr_pools = npools;
 	BUG_ON(npools > IOMMU_NR_POOLS);
 
-	iommu->page_table_shift = page_table_shift;
-	iommu->iommu_tbl_ops = iommu_tbl_ops;
+	iommu->table_shift = table_shift;
+	iommu->lazy_flush = lazy_flush;
 	start = 0;
+	if (skip_span_boundary_check)
+		iommu->flags |= IOMMU_NO_SPAN_BOUND;
 	if (large_pool)
 		iommu->flags |= IOMMU_HAS_LARGE_POOL;
 
@@ -48,11 +83,11 @@ extern void iommu_tbl_pool_init(struct iommu_table *iommu,
 	else
 		iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
 	for (i = 0; i < iommu->nr_pools; i++) {
-		spin_lock_init(&(iommu->arena_pool[i].lock));
-		iommu->arena_pool[i].start = start;
-		iommu->arena_pool[i].hint = start;
+		spin_lock_init(&(iommu->pools[i].lock));
+		iommu->pools[i].start = start;
+		iommu->pools[i].hint = start;
 		start += iommu->poolsize; /* start for next pool */
-		iommu->arena_pool[i].end = start - 1;
+		iommu->pools[i].end = start - 1;
 	}
 	if (!large_pool)
 		return;
@@ -65,121 +100,136 @@ extern void iommu_tbl_pool_init(struct iommu_table *iommu,
 EXPORT_SYMBOL(iommu_tbl_pool_init);
 
 unsigned long iommu_tbl_range_alloc(struct device *dev,
-				struct iommu_table *iommu,
+				struct iommu_map_table *iommu,
 				unsigned long npages,
 				unsigned long *handle,
-				unsigned int pool_hash)
+				unsigned long mask,
+				unsigned int align_order)
 {
+	unsigned int pool_hash = __this_cpu_read(iommu_pool_hash);
 	unsigned long n, end, start, limit, boundary_size;
-	struct iommu_pool *arena;
+	struct iommu_pool *pool;
 	int pass = 0;
 	unsigned int pool_nr;
 	unsigned int npools = iommu->nr_pools;
 	unsigned long flags;
 	bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
-	bool largealloc = (large_pool && npages > IOMMU_LARGE_ALLOC);
+	bool largealloc = (large_pool && npages > iommu_large_alloc);
 	unsigned long shift;
+	unsigned long align_mask = 0;
+
+	if (align_order > 0)
+		align_mask = 0xffffffffffffffffl >> (64 - align_order);
 
 	/* Sanity check */
 	if (unlikely(npages == 0)) {
-		printk_ratelimited("npages == 0\n");
+		WARN_ON_ONCE(1);
 		return DMA_ERROR_CODE;
 	}
 
 	if (largealloc) {
-		arena = &(iommu->large_pool);
-		spin_lock_irqsave(&arena->lock, flags);
+		pool = &(iommu->large_pool);
 		pool_nr = 0; /* to keep compiler happy */
 	} else {
 		/* pick out pool_nr */
 		pool_nr =  pool_hash & (npools - 1);
-		arena = &(iommu->arena_pool[pool_nr]);
-
-		/* find first available unlocked pool */
-		while (!spin_trylock_irqsave(&(arena->lock), flags)) {
-			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
-			arena = &(iommu->arena_pool[pool_nr]);
-		}
+		pool = &(iommu->pools[pool_nr]);
 	}
+	spin_lock_irqsave(&pool->lock, flags);
 
  again:
 	if (pass == 0 && handle && *handle &&
-	    (*handle >= arena->start) && (*handle < arena->end))
+	    (*handle >= pool->start) && (*handle < pool->end))
 		start = *handle;
 	else
-		start = arena->hint;
+		start = pool->hint;
 
-	limit = arena->end;
+	limit = pool->end;
 
 	/* The case below can happen if we have a small segment appended
 	 * to a large, or when the previous alloc was at the very end of
-	 * the available space. If so, go back to the beginning and flush.
+	 * the available space. If so, go back to the beginning. If a
+	 * flush is needed, it will get done based on the return value
+	 * from iommu_area_alloc() below.
 	 */
-	if (start >= limit) {
-		start = arena->start;
-		if (iommu->iommu_tbl_ops->reset != NULL)
-			iommu->iommu_tbl_ops->reset(iommu);
+	if (start >= limit)
+		start = pool->start;
+	shift = iommu->table_map_base >> iommu->table_shift;
+	if (limit + shift > mask) {
+		limit = mask - shift + 1;
+		/* If we're constrained on address range, first try
+		 * at the masked hint to avoid O(n) search complexity,
+		 * but on second pass, start at 0 in pool 0.
+		 */
+		if ((start & mask) >= limit || pass > 0) {
+			spin_unlock(&(pool->lock));
+			pool = &(iommu->pools[0]);
+			spin_lock(&(pool->lock));
+			start = pool->start;
+		} else {
+			start &= mask;
+		}
 	}
 
 	if (dev)
 		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-				      1 << iommu->page_table_shift);
+				      1 << iommu->table_shift);
 	else
-		boundary_size = ALIGN(1ULL << 32, 1 << iommu->page_table_shift);
+		boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
 
-	shift = iommu->page_table_map_base >> iommu->page_table_shift;
-	boundary_size = boundary_size >> iommu->page_table_shift;
+	boundary_size = boundary_size >> iommu->table_shift;
 	/*
-	 * if the iommu has a non-trivial cookie <-> index mapping, we set
+	 * if the skip_span_boundary_check had been set during init, we set
 	 * things up so that iommu_is_span_boundary() merely checks if the
 	 * (index + npages) < num_tsb_entries
 	 */
-	if (iommu->iommu_tbl_ops->cookie_to_index != NULL) {
+	if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
 		shift = 0;
 		boundary_size = iommu->poolsize * iommu->nr_pools;
 	}
 	n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
-			     boundary_size, 0);
+			     boundary_size, align_mask);
 	if (n == -1) {
 		if (likely(pass == 0)) {
 			/* First failure, rescan from the beginning.  */
-			arena->hint = arena->start;
-			if (iommu->iommu_tbl_ops->reset != NULL)
-				iommu->iommu_tbl_ops->reset(iommu);
+			pool->hint = pool->start;
+			set_flush(iommu);
 			pass++;
 			goto again;
 		} else if (!largealloc && pass <= iommu->nr_pools) {
-			spin_unlock(&(arena->lock));
+			spin_unlock(&(pool->lock));
 			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
-			arena = &(iommu->arena_pool[pool_nr]);
-			while (!spin_trylock(&(arena->lock))) {
-				pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
-				arena = &(iommu->arena_pool[pool_nr]);
-			}
-			arena->hint = arena->start;
+			pool = &(iommu->pools[pool_nr]);
+			spin_lock(&(pool->lock));
+			pool->hint = pool->start;
+			set_flush(iommu);
 			pass++;
 			goto again;
 		} else {
 			/* give up */
-			spin_unlock_irqrestore(&(arena->lock), flags);
-			return DMA_ERROR_CODE;
+			n = DMA_ERROR_CODE;
+			goto bail;
 		}
 	}
+	if (n < pool->hint || need_flush(iommu)) {
+		clear_flush(iommu);
+		iommu->lazy_flush(iommu);
+	}
 
 	end = n + npages;
-
-	arena->hint = end;
+	pool->hint = end;
 
 	/* Update handle for SG allocations */
 	if (handle)
 		*handle = end;
-	spin_unlock_irqrestore(&(arena->lock), flags);
+bail:
+	spin_unlock_irqrestore(&(pool->lock), flags);
 
 	return n;
 }
 EXPORT_SYMBOL(iommu_tbl_range_alloc);
 
-static struct iommu_pool *get_pool(struct iommu_table *tbl,
+static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
 				   unsigned long entry)
 {
 	struct iommu_pool *p;
@@ -193,31 +243,27 @@ static struct iommu_pool *get_pool(struct iommu_table *tbl,
 		unsigned int pool_nr = entry / tbl->poolsize;
 
 		BUG_ON(pool_nr >= tbl->nr_pools);
-		p = &tbl->arena_pool[pool_nr];
+		p = &tbl->pools[pool_nr];
 	}
 	return p;
 }
 
-void iommu_tbl_range_free(struct iommu_table *iommu, u64 dma_addr,
-			  unsigned long npages, bool do_demap, void *demap_arg)
+/* Caller supplies the index of the entry into the iommu map table
+ * itself when the mapping from dma_addr to the entry is not the
+ * default addr->entry mapping below.
+ */
+void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
+			  unsigned long npages, unsigned long entry)
 {
-	unsigned long entry;
 	struct iommu_pool *pool;
 	unsigned long flags;
-	unsigned long shift = iommu->page_table_shift;
+	unsigned long shift = iommu->table_shift;
 
-	if (iommu->iommu_tbl_ops->cookie_to_index != NULL) {
-		entry = (*iommu->iommu_tbl_ops->cookie_to_index)(dma_addr,
-								 demap_arg);
-	} else {
-		entry = (dma_addr - iommu->page_table_map_base) >> shift;
-	}
+	if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
+		entry = (dma_addr - iommu->table_map_base) >> shift;
 	pool = get_pool(iommu, entry);
 
 	spin_lock_irqsave(&(pool->lock), flags);
-	if (do_demap && iommu->iommu_tbl_ops->demap != NULL)
-		(*iommu->iommu_tbl_ops->demap)(demap_arg, entry, npages);
-
 	bitmap_clear(iommu->map, entry, npages);
 	spin_unlock_irqrestore(&(pool->lock), flags);
 }