iommu.c 30 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
 * 
 * Rewrite, cleanup, new allocation schemes, virtual merging: 
 * Copyright (C) 2004 Olof Johansson, IBM Corporation
 *               and  Ben. Herrenschmidt, IBM Corporation
 *
 * Dynamic DMA mapping support, bus-independent parts.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */


#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/dma-mapping.h>
33
#include <linux/bitmap.h>
34
#include <linux/iommu-helper.h>
Milton Miller's avatar
Milton Miller committed
35
#include <linux/crash_dump.h>
36
#include <linux/hash.h>
37
38
#include <linux/fault-inject.h>
#include <linux/pci.h>
39
40
#include <linux/iommu.h>
#include <linux/sched.h>
Linus Torvalds's avatar
Linus Torvalds committed
41
42
43
44
45
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
46
#include <asm/kdump.h>
47
#include <asm/fadump.h>
48
#include <asm/vio.h>
49
#include <asm/tce.h>
Linus Torvalds's avatar
Linus Torvalds committed
50
51
52

#define DBG(...)

53
static int novmerge;
54

55
56
static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);

Linus Torvalds's avatar
Linus Torvalds committed
57
58
59
60
61
62
63
64
65
66
67
static int __init setup_iommu(char *str)
{
	if (!strcmp(str, "novmerge"))
		novmerge = 1;
	else if (!strcmp(str, "vmerge"))
		novmerge = 0;
	return 1;
}

__setup("iommu=", setup_iommu);

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);

/*
 * We precalculate the hash to avoid doing it on every allocation.
 *
 * The hash is important to spread CPUs across all the pools. For example,
 * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
 * with 4 pools all primary threads would map to the same pool.
 */
static int __init setup_iommu_pool_hash(void)
{
	unsigned int i;

	for_each_possible_cpu(i)
		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);

	return 0;
}
subsys_initcall(setup_iommu_pool_hash);

88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#ifdef CONFIG_FAIL_IOMMU

static DECLARE_FAULT_ATTR(fail_iommu);

static int __init setup_fail_iommu(char *str)
{
	return setup_fault_attr(&fail_iommu, str);
}
__setup("fail_iommu=", setup_fail_iommu);

static bool should_fail_iommu(struct device *dev)
{
	return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
}

static int __init fail_iommu_debugfs(void)
{
	struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
						       NULL, &fail_iommu);

108
	return PTR_ERR_OR_ZERO(dir);
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
}
late_initcall(fail_iommu_debugfs);

static ssize_t fail_iommu_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
{
	return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
}

static ssize_t fail_iommu_store(struct device *dev,
				struct device_attribute *attr, const char *buf,
				size_t count)
{
	int i;

	if (count > 0 && sscanf(buf, "%d", &i) > 0)
		dev->archdata.fail_iommu = (i == 0) ? 0 : 1;

	return count;
}

static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
		   fail_iommu_store);

static int fail_iommu_bus_notify(struct notifier_block *nb,
				 unsigned long action, void *data)
{
	struct device *dev = data;

	if (action == BUS_NOTIFY_ADD_DEVICE) {
		if (device_create_file(dev, &dev_attr_fail_iommu))
			pr_warn("Unable to create IOMMU fault injection sysfs "
				"entries\n");
	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
		device_remove_file(dev, &dev_attr_fail_iommu);
	}

	return 0;
}

static struct notifier_block fail_iommu_bus_notifier = {
	.notifier_call = fail_iommu_bus_notify
};

static int __init fail_iommu_setup(void)
{
#ifdef CONFIG_PCI
	bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
#endif
#ifdef CONFIG_IBMVIO
	bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
#endif

	return 0;
}
/*
 * Must execute after PCI and VIO subsystem have initialised but before
 * devices are probed.
 */
arch_initcall(fail_iommu_setup);
#else
static inline bool should_fail_iommu(struct device *dev)
{
	return false;
}
#endif

176
177
static unsigned long iommu_range_alloc(struct device *dev,
				       struct iommu_table *tbl,
Linus Torvalds's avatar
Linus Torvalds committed
178
179
                                       unsigned long npages,
                                       unsigned long *handle,
180
                                       unsigned long mask,
Linus Torvalds's avatar
Linus Torvalds committed
181
182
                                       unsigned int align_order)
{ 
183
	unsigned long n, end, start;
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
187
	unsigned long limit;
	int largealloc = npages > 15;
	int pass = 0;
	unsigned long align_mask;
188
	unsigned long boundary_size;
189
	unsigned long flags;
190
191
	unsigned int pool_nr;
	struct iommu_pool *pool;
Linus Torvalds's avatar
Linus Torvalds committed
192

193
	align_mask = (1ull << align_order) - 1;
Linus Torvalds's avatar
Linus Torvalds committed
194
195
196
197

	/* This allocator was derived from x86_64's bit string search */

	/* Sanity check */
Nick Piggin's avatar
Nick Piggin committed
198
	if (unlikely(npages == 0)) {
Linus Torvalds's avatar
Linus Torvalds committed
199
200
		if (printk_ratelimit())
			WARN_ON(1);
201
		return IOMMU_MAPPING_ERROR;
Linus Torvalds's avatar
Linus Torvalds committed
202
203
	}

204
	if (should_fail_iommu(dev))
205
		return IOMMU_MAPPING_ERROR;
206

207
208
209
210
	/*
	 * We don't need to disable preemption here because any CPU can
	 * safely use any IOMMU pool.
	 */
211
	pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
212

213
214
	if (largealloc)
		pool = &(tbl->large_pool);
Linus Torvalds's avatar
Linus Torvalds committed
215
	else
216
		pool = &(tbl->pools[pool_nr]);
Linus Torvalds's avatar
Linus Torvalds committed
217

218
219
220
	spin_lock_irqsave(&(pool->lock), flags);

again:
221
222
	if ((pass == 0) && handle && *handle &&
	    (*handle >= pool->start) && (*handle < pool->end))
223
224
225
		start = *handle;
	else
		start = pool->hint;
Linus Torvalds's avatar
Linus Torvalds committed
226

227
	limit = pool->end;
Linus Torvalds's avatar
Linus Torvalds committed
228
229
230
231
232
233

	/* The case below can happen if we have a small segment appended
	 * to a large, or when the previous alloc was at the very end of
	 * the available space. If so, go back to the initial start.
	 */
	if (start >= limit)
234
		start = pool->start;
Linus Torvalds's avatar
Linus Torvalds committed
235

236
237
238
239
	if (limit + tbl->it_offset > mask) {
		limit = mask - tbl->it_offset + 1;
		/* If we're constrained on address range, first try
		 * at the masked hint to avoid O(n) search complexity,
240
		 * but on second pass, start at 0 in pool 0.
241
		 */
242
		if ((start & mask) >= limit || pass > 0) {
243
			spin_unlock(&(pool->lock));
244
			pool = &(tbl->pools[0]);
245
			spin_lock(&(pool->lock));
246
247
			start = pool->start;
		} else {
248
			start &= mask;
249
		}
250
251
	}

252
253
	if (dev)
		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
254
				      1 << tbl->it_page_shift);
255
	else
256
		boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
257
	/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
Linus Torvalds's avatar
Linus Torvalds committed
258

259
260
	n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
			     boundary_size >> tbl->it_page_shift, align_mask);
261
	if (n == -1) {
262
263
264
		if (likely(pass == 0)) {
			/* First try the pool from the start */
			pool->hint = pool->start;
Linus Torvalds's avatar
Linus Torvalds committed
265
266
			pass++;
			goto again;
267
268
269
270
271
272
273
274
275
276
277

		} else if (pass <= tbl->nr_pools) {
			/* Now try scanning all the other pools */
			spin_unlock(&(pool->lock));
			pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
			pool = &tbl->pools[pool_nr];
			spin_lock(&(pool->lock));
			pool->hint = pool->start;
			pass++;
			goto again;

Linus Torvalds's avatar
Linus Torvalds committed
278
		} else {
279
280
			/* Give up */
			spin_unlock_irqrestore(&(pool->lock), flags);
281
			return IOMMU_MAPPING_ERROR;
Linus Torvalds's avatar
Linus Torvalds committed
282
283
284
		}
	}

285
	end = n + npages;
Linus Torvalds's avatar
Linus Torvalds committed
286
287
288
289

	/* Bump the hint to a new block for small allocs. */
	if (largealloc) {
		/* Don't bump to new block to avoid fragmentation */
290
		pool->hint = end;
Linus Torvalds's avatar
Linus Torvalds committed
291
292
	} else {
		/* Overflow will be taken care of at the next allocation */
293
		pool->hint = (end + tbl->it_blocksize - 1) &
Linus Torvalds's avatar
Linus Torvalds committed
294
295
296
297
298
299
300
		                ~(tbl->it_blocksize - 1);
	}

	/* Update handle for SG allocations */
	if (handle)
		*handle = end;

301
302
	spin_unlock_irqrestore(&(pool->lock), flags);

Linus Torvalds's avatar
Linus Torvalds committed
303
304
305
	return n;
}

306
307
308
static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
			      void *page, unsigned int npages,
			      enum dma_data_direction direction,
309
			      unsigned long mask, unsigned int align_order,
310
			      unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
311
{
312
	unsigned long entry;
313
	dma_addr_t ret = IOMMU_MAPPING_ERROR;
314
	int build_fail;
315

316
	entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
Linus Torvalds's avatar
Linus Torvalds committed
317

318
319
	if (unlikely(entry == IOMMU_MAPPING_ERROR))
		return IOMMU_MAPPING_ERROR;
Linus Torvalds's avatar
Linus Torvalds committed
320
321

	entry += tbl->it_offset;	/* Offset into real TCE table */
322
	ret = entry << tbl->it_page_shift;	/* Set the return dma address */
Linus Torvalds's avatar
Linus Torvalds committed
323
324

	/* Put the TCEs in the HW table */
325
	build_fail = tbl->it_ops->set(tbl, entry, npages,
326
327
				      (unsigned long)page &
				      IOMMU_PAGE_MASK(tbl), direction, attrs);
328

329
	/* tbl->it_ops->set() only returns non-zero for transient errors.
330
	 * Clean up the table bitmap in this case and return
331
	 * IOMMU_MAPPING_ERROR. For all other errors the functionality is
332
333
334
335
	 * not altered.
	 */
	if (unlikely(build_fail)) {
		__iommu_free(tbl, ret, npages);
336
		return IOMMU_MAPPING_ERROR;
337
	}
Linus Torvalds's avatar
Linus Torvalds committed
338
339

	/* Flush/invalidate TLB caches if necessary */
340
341
	if (tbl->it_ops->flush)
		tbl->it_ops->flush(tbl);
Linus Torvalds's avatar
Linus Torvalds committed
342
343
344
345
346
347
348

	/* Make sure updates are seen by hardware */
	mb();

	return ret;
}

349
350
static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
			     unsigned int npages)
Linus Torvalds's avatar
Linus Torvalds committed
351
352
353
{
	unsigned long entry, free_entry;

354
	entry = dma_addr >> tbl->it_page_shift;
Linus Torvalds's avatar
Linus Torvalds committed
355
356
357
358
359
360
361
	free_entry = entry - tbl->it_offset;

	if (((free_entry + npages) > tbl->it_size) ||
	    (entry < tbl->it_offset)) {
		if (printk_ratelimit()) {
			printk(KERN_INFO "iommu_free: invalid entry\n");
			printk(KERN_INFO "\tentry     = 0x%lx\n", entry); 
362
363
364
365
366
367
			printk(KERN_INFO "\tdma_addr  = 0x%llx\n", (u64)dma_addr);
			printk(KERN_INFO "\tTable     = 0x%llx\n", (u64)tbl);
			printk(KERN_INFO "\tbus#      = 0x%llx\n", (u64)tbl->it_busno);
			printk(KERN_INFO "\tsize      = 0x%llx\n", (u64)tbl->it_size);
			printk(KERN_INFO "\tstartOff  = 0x%llx\n", (u64)tbl->it_offset);
			printk(KERN_INFO "\tindex     = 0x%llx\n", (u64)tbl->it_index);
Linus Torvalds's avatar
Linus Torvalds committed
368
369
			WARN_ON(1);
		}
370
371

		return false;
Linus Torvalds's avatar
Linus Torvalds committed
372
373
	}

374
375
376
	return true;
}

377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
static struct iommu_pool *get_pool(struct iommu_table *tbl,
				   unsigned long entry)
{
	struct iommu_pool *p;
	unsigned long largepool_start = tbl->large_pool.start;

	/* The large pool is the last pool at the top of the table */
	if (entry >= largepool_start) {
		p = &tbl->large_pool;
	} else {
		unsigned int pool_nr = entry / tbl->poolsize;

		BUG_ON(pool_nr > tbl->nr_pools);
		p = &tbl->pools[pool_nr];
	}

	return p;
}

396
397
static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
			 unsigned int npages)
Linus Torvalds's avatar
Linus Torvalds committed
398
{
399
	unsigned long entry, free_entry;
Linus Torvalds's avatar
Linus Torvalds committed
400
	unsigned long flags;
401
	struct iommu_pool *pool;
Linus Torvalds's avatar
Linus Torvalds committed
402

403
	entry = dma_addr >> tbl->it_page_shift;
404
405
	free_entry = entry - tbl->it_offset;

406
407
	pool = get_pool(tbl, free_entry);

408
409
410
	if (!iommu_free_check(tbl, dma_addr, npages))
		return;

411
	tbl->it_ops->clear(tbl, entry, npages);
412

413
	spin_lock_irqsave(&(pool->lock), flags);
414
	bitmap_clear(tbl->it_map, free_entry, npages);
415
	spin_unlock_irqrestore(&(pool->lock), flags);
416
417
418
419
420
421
}

static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
		unsigned int npages)
{
	__iommu_free(tbl, dma_addr, npages);
Linus Torvalds's avatar
Linus Torvalds committed
422
423
424
425
426

	/* Make sure TLB cache is flushed if the HW needs it. We do
	 * not do an mb() here on purpose, it is not needed on any of
	 * the current platforms.
	 */
427
428
	if (tbl->it_ops->flush)
		tbl->it_ops->flush(tbl);
Linus Torvalds's avatar
Linus Torvalds committed
429
430
}

431
432
433
int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
		     struct scatterlist *sglist, int nelems,
		     unsigned long mask, enum dma_data_direction direction,
434
		     unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
435
436
437
{
	dma_addr_t dma_next = 0, dma_addr;
	struct scatterlist *s, *outs, *segstart;
438
	int outcount, incount, i, build_fail = 0;
439
	unsigned int align;
Linus Torvalds's avatar
Linus Torvalds committed
440
	unsigned long handle;
441
	unsigned int max_seg_size;
Linus Torvalds's avatar
Linus Torvalds committed
442
443
444
445
446
447
448
449

	BUG_ON(direction == DMA_NONE);

	if ((nelems == 0) || !tbl)
		return 0;

	outs = s = segstart = &sglist[0];
	outcount = 1;
Brian King's avatar
Brian King committed
450
	incount = nelems;
Linus Torvalds's avatar
Linus Torvalds committed
451
452
453
454
455
	handle = 0;

	/* Init first segment length for backout at failure */
	outs->dma_length = 0;

456
	DBG("sg mapping %d elements:\n", nelems);
Linus Torvalds's avatar
Linus Torvalds committed
457

458
	max_seg_size = dma_get_max_seg_size(dev);
Jens Axboe's avatar
Jens Axboe committed
459
	for_each_sg(sglist, s, nelems, i) {
Linus Torvalds's avatar
Linus Torvalds committed
460
461
462
463
464
465
466
467
468
		unsigned long vaddr, npages, entry, slen;

		slen = s->length;
		/* Sanity check */
		if (slen == 0) {
			dma_next = 0;
			continue;
		}
		/* Allocate iommu entries for that segment */
Jens Axboe's avatar
Jens Axboe committed
469
		vaddr = (unsigned long) sg_virt(s);
470
		npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
471
		align = 0;
472
		if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
473
		    (vaddr & ~PAGE_MASK) == 0)
474
			align = PAGE_SHIFT - tbl->it_page_shift;
475
		entry = iommu_range_alloc(dev, tbl, npages, &handle,
476
					  mask >> tbl->it_page_shift, align);
Linus Torvalds's avatar
Linus Torvalds committed
477
478
479
480

		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);

		/* Handle failure */
481
		if (unlikely(entry == IOMMU_MAPPING_ERROR)) {
482
483
			if (!(attrs & DMA_ATTR_NO_WARN) &&
			    printk_ratelimit())
484
485
486
				dev_info(dev, "iommu_alloc failed, tbl %p "
					 "vaddr %lx npages %lu\n", tbl, vaddr,
					 npages);
Linus Torvalds's avatar
Linus Torvalds committed
487
488
489
490
491
			goto failure;
		}

		/* Convert entry to a dma_addr_t */
		entry += tbl->it_offset;
492
493
		dma_addr = entry << tbl->it_page_shift;
		dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
Linus Torvalds's avatar
Linus Torvalds committed
494

495
		DBG("  - %lu pages, entry: %lx, dma_addr: %lx\n",
Linus Torvalds's avatar
Linus Torvalds committed
496
497
498
			    npages, entry, dma_addr);

		/* Insert into HW table */
499
		build_fail = tbl->it_ops->set(tbl, entry, npages,
500
501
					      vaddr & IOMMU_PAGE_MASK(tbl),
					      direction, attrs);
502
503
		if(unlikely(build_fail))
			goto failure;
Linus Torvalds's avatar
Linus Torvalds committed
504
505
506
507
508
509
510

		/* If we are in an open segment, try merging */
		if (segstart != s) {
			DBG("  - trying merge...\n");
			/* We cannot merge if:
			 * - allocated dma_addr isn't contiguous to previous allocation
			 */
511
512
			if (novmerge || (dma_addr != dma_next) ||
			    (outs->dma_length + s->length > max_seg_size)) {
Linus Torvalds's avatar
Linus Torvalds committed
513
514
				/* Can't merge: create a new segment */
				segstart = s;
Jens Axboe's avatar
Jens Axboe committed
515
516
				outcount++;
				outs = sg_next(outs);
Linus Torvalds's avatar
Linus Torvalds committed
517
518
519
				DBG("    can't merge, new segment.\n");
			} else {
				outs->dma_length += s->length;
520
				DBG("    merged, new len: %ux\n", outs->dma_length);
Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
			}
		}

		if (segstart == s) {
			/* This is a new segment, fill entries */
			DBG("  - filling new segment.\n");
			outs->dma_address = dma_addr;
			outs->dma_length = slen;
		}

		/* Calculate next page pointer for contiguous check */
		dma_next = dma_addr + slen;

		DBG("  - dma next is: %lx\n", dma_next);
	}

	/* Flush/invalidate TLB caches if necessary */
538
539
	if (tbl->it_ops->flush)
		tbl->it_ops->flush(tbl);
Linus Torvalds's avatar
Linus Torvalds committed
540
541
542

	DBG("mapped %d elements:\n", outcount);

543
	/* For the sake of ppc_iommu_unmap_sg, we clear out the length in the
Linus Torvalds's avatar
Linus Torvalds committed
544
545
	 * next entry of the sglist if we didn't fill the list completely
	 */
Brian King's avatar
Brian King committed
546
	if (outcount < incount) {
Jens Axboe's avatar
Jens Axboe committed
547
		outs = sg_next(outs);
548
		outs->dma_address = IOMMU_MAPPING_ERROR;
Linus Torvalds's avatar
Linus Torvalds committed
549
550
		outs->dma_length = 0;
	}
551
552
553
554

	/* Make sure updates are seen by hardware */
	mb();

Linus Torvalds's avatar
Linus Torvalds committed
555
556
557
	return outcount;

 failure:
Jens Axboe's avatar
Jens Axboe committed
558
	for_each_sg(sglist, s, nelems, i) {
Linus Torvalds's avatar
Linus Torvalds committed
559
560
561
		if (s->dma_length != 0) {
			unsigned long vaddr, npages;

562
			vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
563
			npages = iommu_num_pages(s->dma_address, s->dma_length,
564
						 IOMMU_PAGE_SIZE(tbl));
565
			__iommu_free(tbl, vaddr, npages);
566
			s->dma_address = IOMMU_MAPPING_ERROR;
567
			s->dma_length = 0;
Linus Torvalds's avatar
Linus Torvalds committed
568
		}
Jens Axboe's avatar
Jens Axboe committed
569
570
		if (s == outs)
			break;
Linus Torvalds's avatar
Linus Torvalds committed
571
572
573
574
575
	}
	return 0;
}


576
577
void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
			int nelems, enum dma_data_direction direction,
578
			unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
579
{
Jens Axboe's avatar
Jens Axboe committed
580
	struct scatterlist *sg;
Linus Torvalds's avatar
Linus Torvalds committed
581
582
583
584
585
586

	BUG_ON(direction == DMA_NONE);

	if (!tbl)
		return;

Jens Axboe's avatar
Jens Axboe committed
587
	sg = sglist;
Linus Torvalds's avatar
Linus Torvalds committed
588
589
	while (nelems--) {
		unsigned int npages;
Jens Axboe's avatar
Jens Axboe committed
590
		dma_addr_t dma_handle = sg->dma_address;
Linus Torvalds's avatar
Linus Torvalds committed
591

Jens Axboe's avatar
Jens Axboe committed
592
		if (sg->dma_length == 0)
Linus Torvalds's avatar
Linus Torvalds committed
593
			break;
594
		npages = iommu_num_pages(dma_handle, sg->dma_length,
595
					 IOMMU_PAGE_SIZE(tbl));
596
		__iommu_free(tbl, dma_handle, npages);
Jens Axboe's avatar
Jens Axboe committed
597
		sg = sg_next(sg);
Linus Torvalds's avatar
Linus Torvalds committed
598
599
600
601
602
603
	}

	/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
	 * do not do an mb() here, the affected platforms do not need it
	 * when freeing.
	 */
604
605
	if (tbl->it_ops->flush)
		tbl->it_ops->flush(tbl);
Linus Torvalds's avatar
Linus Torvalds committed
606
607
}

608
609
static void iommu_table_clear(struct iommu_table *tbl)
{
610
611
612
613
614
615
	/*
	 * In case of firmware assisted dump system goes through clean
	 * reboot process at the time of system crash. Hence it's safe to
	 * clear the TCE entries if firmware assisted dump is active.
	 */
	if (!is_kdump_kernel() || is_fadump_active()) {
616
		/* Clear the table in case firmware left allocations in it */
617
		tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
618
619
620
621
		return;
	}

#ifdef CONFIG_CRASH_DUMP
622
	if (tbl->it_ops->get) {
623
624
625
626
		unsigned long index, tceval, tcecount = 0;

		/* Reserve the existing mappings left by the first kernel. */
		for (index = 0; index < tbl->it_size; index++) {
627
			tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
			/*
			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
			 */
			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
				__set_bit(index, tbl->it_map);
				tcecount++;
			}
		}

		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
			printk(KERN_WARNING "TCE table is full; freeing ");
			printk(KERN_WARNING "%d entries for the kdump boot\n",
				KDUMP_MIN_TCE_ENTRIES);
			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
				index < tbl->it_size; index++)
				__clear_bit(index, tbl->it_map);
		}
	}
#endif
}

Linus Torvalds's avatar
Linus Torvalds committed
649
650
651
652
/*
 * Build a iommu_table structure.  This contains a bit map which
 * is used to manage allocation of the tce space.
 */
653
struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
Linus Torvalds's avatar
Linus Torvalds committed
654
655
656
{
	unsigned long sz;
	static int welcomed = 0;
657
	struct page *page;
658
659
	unsigned int i;
	struct iommu_pool *p;
Linus Torvalds's avatar
Linus Torvalds committed
660

661
662
	BUG_ON(!tbl->it_ops);

Linus Torvalds's avatar
Linus Torvalds committed
663
	/* number of bytes needed for the bitmap */
664
	sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
Linus Torvalds's avatar
Linus Torvalds committed
665

666
	page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
667
	if (!page)
Linus Torvalds's avatar
Linus Torvalds committed
668
		panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
669
	tbl->it_map = page_address(page);
Linus Torvalds's avatar
Linus Torvalds committed
670
671
	memset(tbl->it_map, 0, sz);

672
673
674
675
676
677
678
679
	/*
	 * Reserve page 0 so it will not be used for any mappings.
	 * This avoids buggy drivers that consider page 0 to be invalid
	 * to crash the machine or even lose data.
	 */
	if (tbl->it_offset == 0)
		set_bit(0, tbl->it_map);

680
	/* We only split the IOMMU table if we have 1GB or more of space */
681
	if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
682
683
684
685
686
		tbl->nr_pools = IOMMU_NR_POOLS;
	else
		tbl->nr_pools = 1;

	/* We reserve the top 1/4 of the table for large allocations */
687
	tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
688

689
	for (i = 0; i < tbl->nr_pools; i++) {
690
691
692
693
694
695
696
697
698
699
700
701
		p = &tbl->pools[i];
		spin_lock_init(&(p->lock));
		p->start = tbl->poolsize * i;
		p->hint = p->start;
		p->end = p->start + tbl->poolsize;
	}

	p = &tbl->large_pool;
	spin_lock_init(&(p->lock));
	p->start = tbl->poolsize * i;
	p->hint = p->start;
	p->end = tbl->it_size;
Linus Torvalds's avatar
Linus Torvalds committed
702

703
	iommu_table_clear(tbl);
John Rose's avatar
John Rose committed
704

Linus Torvalds's avatar
Linus Torvalds committed
705
706
707
708
709
710
711
712
713
	if (!welcomed) {
		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
		       novmerge ? "disabled" : "enabled");
		welcomed = 1;
	}

	return tbl;
}

714
static void iommu_table_free(struct kref *kref)
Linus Torvalds's avatar
Linus Torvalds committed
715
{
716
	unsigned long bitmap_sz;
Linus Torvalds's avatar
Linus Torvalds committed
717
	unsigned int order;
718
	struct iommu_table *tbl;
Linus Torvalds's avatar
Linus Torvalds committed
719

720
	tbl = container_of(kref, struct iommu_table, it_kref);
721

722
723
724
	if (tbl->it_ops->free)
		tbl->it_ops->free(tbl);

725
726
	if (!tbl->it_map) {
		kfree(tbl);
Linus Torvalds's avatar
Linus Torvalds committed
727
728
729
		return;
	}

730
731
732
733
734
735
736
	/*
	 * In case we have reserved the first bit, we should not emit
	 * the warning below.
	 */
	if (tbl->it_offset == 0)
		clear_bit(0, tbl->it_map);

Linus Torvalds's avatar
Linus Torvalds committed
737
	/* verify that table contains no entries */
738
	if (!bitmap_empty(tbl->it_map, tbl->it_size))
739
		pr_warn("%s: Unexpected TCEs\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
740
741

	/* calculate bitmap size in bytes */
742
	bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
Linus Torvalds's avatar
Linus Torvalds committed
743
744
745
746
747
748
749
750

	/* free bitmap */
	order = get_order(bitmap_sz);
	free_pages((unsigned long) tbl->it_map, order);

	/* free table */
	kfree(tbl);
}
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768

struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
{
	if (kref_get_unless_zero(&tbl->it_kref))
		return tbl;

	return NULL;
}
EXPORT_SYMBOL_GPL(iommu_tce_table_get);

int iommu_tce_table_put(struct iommu_table *tbl)
{
	if (WARN_ON(!tbl))
		return 0;

	return kref_put(&tbl->it_kref, iommu_table_free);
}
EXPORT_SYMBOL_GPL(iommu_tce_table_put);
Linus Torvalds's avatar
Linus Torvalds committed
769
770

/* Creates TCEs for a user provided buffer.  The user buffer must be
771
772
773
 * contiguous real kernel storage (not vmalloc).  The address passed here
 * comprises a page address and offset into that page. The dma_addr_t
 * returned will point to the same byte within the page as was passed in.
Linus Torvalds's avatar
Linus Torvalds committed
774
 */
775
776
777
dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
			  struct page *page, unsigned long offset, size_t size,
			  unsigned long mask, enum dma_data_direction direction,
778
			  unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
779
{
780
	dma_addr_t dma_handle = IOMMU_MAPPING_ERROR;
781
	void *vaddr;
Linus Torvalds's avatar
Linus Torvalds committed
782
	unsigned long uaddr;
783
	unsigned int npages, align;
Linus Torvalds's avatar
Linus Torvalds committed
784
785
786

	BUG_ON(direction == DMA_NONE);

787
	vaddr = page_address(page) + offset;
Linus Torvalds's avatar
Linus Torvalds committed
788
	uaddr = (unsigned long)vaddr;
789
	npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
Linus Torvalds's avatar
Linus Torvalds committed
790
791

	if (tbl) {
792
		align = 0;
793
		if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
794
		    ((unsigned long)vaddr & ~PAGE_MASK) == 0)
795
			align = PAGE_SHIFT - tbl->it_page_shift;
796

797
		dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
798
					 mask >> tbl->it_page_shift, align,
799
					 attrs);
800
		if (dma_handle == IOMMU_MAPPING_ERROR) {
801
802
			if (!(attrs & DMA_ATTR_NO_WARN) &&
			    printk_ratelimit())  {
803
804
805
				dev_info(dev, "iommu_alloc failed, tbl %p "
					 "vaddr %p npages %d\n", tbl, vaddr,
					 npages);
Linus Torvalds's avatar
Linus Torvalds committed
806
807
			}
		} else
808
			dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
Linus Torvalds's avatar
Linus Torvalds committed
809
810
811
812
813
	}

	return dma_handle;
}

814
815
void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
		      size_t size, enum dma_data_direction direction,
816
		      unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
817
{
818
819
	unsigned int npages;

Linus Torvalds's avatar
Linus Torvalds committed
820
821
	BUG_ON(direction == DMA_NONE);

822
	if (tbl) {
823
824
		npages = iommu_num_pages(dma_handle, size,
					 IOMMU_PAGE_SIZE(tbl));
825
826
		iommu_free(tbl, dma_handle, npages);
	}
Linus Torvalds's avatar
Linus Torvalds committed
827
828
829
830
831
832
}

/* Allocates a contiguous real buffer and creates mappings over it.
 * Returns the virtual address of the buffer and sets dma_handle
 * to the dma address (mapping) of the first page.
 */
833
834
835
void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
			   size_t size,	dma_addr_t *dma_handle,
			   unsigned long mask, gfp_t flag, int node)
Linus Torvalds's avatar
Linus Torvalds committed
836
837
838
{
	void *ret = NULL;
	dma_addr_t mapping;
839
840
	unsigned int order;
	unsigned int nio_pages, io_order;
841
	struct page *page;
Linus Torvalds's avatar
Linus Torvalds committed
842
843
844
845
846
847
848
849
850
851

	size = PAGE_ALIGN(size);
	order = get_order(size);

 	/*
	 * Client asked for way too much space.  This is checked later
	 * anyway.  It is easier to debug here for the drivers than in
	 * the tce tables.
	 */
	if (order >= IOMAP_MAX_ORDER) {
852
853
		dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
			 size);
Linus Torvalds's avatar
Linus Torvalds committed
854
855
856
857
858
859
860
		return NULL;
	}

	if (!tbl)
		return NULL;

	/* Alloc enough pages (and possibly more) */
861
	page = alloc_pages_node(node, flag, order);
862
	if (!page)
Linus Torvalds's avatar
Linus Torvalds committed
863
		return NULL;
864
	ret = page_address(page);
Linus Torvalds's avatar
Linus Torvalds committed
865
866
867
	memset(ret, 0, size);

	/* Set up tces to cover the allocated range */
868
869
	nio_pages = size >> tbl->it_page_shift;
	io_order = get_iommu_order(size, tbl);
870
	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
871
			      mask >> tbl->it_page_shift, io_order, 0);
872
	if (mapping == IOMMU_MAPPING_ERROR) {
Linus Torvalds's avatar
Linus Torvalds committed
873
		free_pages((unsigned long)ret, order);
874
875
876
		return NULL;
	}
	*dma_handle = mapping;
Linus Torvalds's avatar
Linus Torvalds committed
877
878
879
880
881
882
883
	return ret;
}

void iommu_free_coherent(struct iommu_table *tbl, size_t size,
			 void *vaddr, dma_addr_t dma_handle)
{
	if (tbl) {
884
885
886
		unsigned int nio_pages;

		size = PAGE_ALIGN(size);
887
		nio_pages = size >> tbl->it_page_shift;
888
		iommu_free(tbl, dma_handle, nio_pages);
Linus Torvalds's avatar
Linus Torvalds committed
889
890
891
892
		size = PAGE_ALIGN(size);
		free_pages((unsigned long)vaddr, get_order(size));
	}
}
893

894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
{
	switch (dir) {
	case DMA_BIDIRECTIONAL:
		return TCE_PCI_READ | TCE_PCI_WRITE;
	case DMA_FROM_DEVICE:
		return TCE_PCI_WRITE;
	case DMA_TO_DEVICE:
		return TCE_PCI_READ;
	default:
		return 0;
	}
}
EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);

909
910
911
912
913
914
#ifdef CONFIG_IOMMU_API
/*
 * SPAPR TCE API
 */
static void group_release(void *iommu_data)
{
915
916
917
	struct iommu_table_group *table_group = iommu_data;

	table_group->group = NULL;
918
919
}

920
void iommu_register_group(struct iommu_table_group *table_group,
921
922
923
924
925
926
927
928
929
930
931
		int pci_domain_number, unsigned long pe_num)
{
	struct iommu_group *grp;
	char *name;

	grp = iommu_group_alloc();
	if (IS_ERR(grp)) {
		pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
				PTR_ERR(grp));
		return;
	}
932
933
	table_group->group = grp;
	iommu_group_set_iommudata(grp, table_group, group_release);
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
	name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
			pci_domain_number, pe_num);
	if (!name)
		return;
	iommu_group_set_name(grp, name);
	kfree(name);
}

enum dma_data_direction iommu_tce_direction(unsigned long tce)
{
	if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
		return DMA_BIDIRECTIONAL;
	else if (tce & TCE_PCI_READ)
		return DMA_TO_DEVICE;
	else if (tce & TCE_PCI_WRITE)
		return DMA_FROM_DEVICE;
	else
		return DMA_NONE;
}
EXPORT_SYMBOL_GPL(iommu_tce_direction);

void iommu_flush_tce(struct iommu_table *tbl)
{
	/* Flush/invalidate TLB caches if necessary */
958
959
	if (tbl->it_ops->flush)
		tbl->it_ops->flush(tbl);
960
961
962
963
964
965

	/* Make sure updates are seen by hardware */
	mb();
}
EXPORT_SYMBOL_GPL(iommu_flush_tce);

966
967
968
int iommu_tce_check_ioba(unsigned long page_shift,
		unsigned long offset, unsigned long size,
		unsigned long ioba, unsigned long npages)
969
{
970
	unsigned long mask = (1UL << page_shift) - 1;
971

972
	if (ioba & mask)
973
974
		return -EINVAL;

975
976
	ioba >>= page_shift;
	if (ioba < offset)
977
978
		return -EINVAL;

979
	if ((ioba + 1) > (offset + size))
980
981
982
983
		return -EINVAL;

	return 0;
}
984
EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
985

986
int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
987
{
988
	unsigned long mask = (1UL << page_shift) - 1;
989

990
	if (gpa & mask)
991
992
993
994
		return -EINVAL;

	return 0;
}
995
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
996

997
998
long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction)
999
{
1000
	long ret;
1001

1002
	ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
1003

1004
1005
1006
	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
			(*direction == DMA_BIDIRECTIONAL)))
		SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
1007
1008
1009

	/* if (unlikely(ret))
		pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
1010
			__func__, hwaddr, entry << tbl->it_page_shift,
1011
1012
1013
1014
				hwaddr, ret); */

	return ret;
}
1015
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
1016

1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
#ifdef CONFIG_PPC_BOOK3S_64
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction)
{
	long ret;

	ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);

	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
			(*direction == DMA_BIDIRECTIONAL))) {
		struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);

		if (likely(pg)) {
			SetPageDirty(pg);
		} else {
			tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
			ret = -EFAULT;
		}
	}

	return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
#endif

1042
1043
int iommu_take_ownership(struct iommu_table *tbl)
{
1044
1045
1046
	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
	int ret = 0;

1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
	/*
	 * VFIO does not control TCE entries allocation and the guest
	 * can write new TCEs on top of existing ones so iommu_tce_build()
	 * must be able to release old pages. This functionality
	 * requires exchange() callback defined so if it is not
	 * implemented, we disallow taking ownership over the table.
	 */
	if (!tbl->it_ops->exchange)
		return -EINVAL;

1057
1058
1059
	spin_lock_irqsave(&tbl->large_pool.lock, flags);
	for (i = 0; i < tbl->nr_pools; i++)
		spin_lock(&tbl->pools[i].lock);
1060
1061
1062
1063
1064
1065

	if (tbl->it_offset == 0)
		clear_bit(0, tbl->it_map);

	if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
		pr_err("iommu_tce: it_map is not empty");
1066
1067
1068
1069
1070
1071
		ret = -EBUSY;
		/* Restore bit#0 set by iommu_init_table() */
		if (tbl->it_offset == 0)
			set_bit(0, tbl->it_map);
	} else {
		memset(tbl->it_map, 0xff, sz);
1072
1073
	}

1074
1075
1076
	for (i = 0; i < tbl->nr_pools; i++)
		spin_unlock(&tbl->pools[i].lock);
	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
1077

1078
	return ret;
1079
1080
1081
1082
1083
}
EXPORT_SYMBOL_GPL(iommu_take_ownership);

void iommu_release_ownership(struct iommu_table *tbl)
{
1084
1085
1086
1087
1088
	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;

	spin_lock_irqsave(&tbl->large_pool.lock, flags);
	for (i = 0; i < tbl->nr_pools; i++)
		spin_lock(&tbl->pools[i].lock);
1089
1090
1091
1092
1093
1094

	memset(tbl->it_map, 0, sz);

	/* Restore bit#0 set by iommu_init_table() */
	if (tbl->it_offset == 0)
		set_bit(0, tbl->it_map);
1095
1096
1097
1098

	for (i = 0; i < tbl->nr_pools; i++)
		spin_unlock(&tbl->pools[i].lock);
	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
1099
1100
1101
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);

1102
int iommu_add_device(struct device *dev)
1103
1104
{
	struct iommu_table *tbl;
1105
	struct iommu_table_group_link *tgl;
1106

1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
	/*
	 * The sysfs entries should be populated before
	 * binding IOMMU group. If sysfs entries isn't
	 * ready, we simply bail.
	 */
	if (!device_is_registered(dev))
		return -ENOENT;

	if (dev->iommu_group) {
		pr_debug("%s: Skipping device %s with iommu group %d\n",
			 __func__, dev_name(dev),
			 iommu_group_id(dev->iommu_group));
1119
1120
1121
1122
		return -EBUSY;
	}

	tbl = get_iommu_table_base(dev);
1123
	if (!tbl) {
1124
1125
		pr_debug("%s: Skipping device %s with no tbl\n",
			 __func__, dev_name(dev));
1126
1127
1128
		return 0;
	}

1129
1130
1131
1132
1133
1134
1135
	tgl = list_first_entry_or_null(&tbl->it_group_list,
			struct iommu_table_group_link, next);
	if (!tgl) {
		pr_debug("%s: Skipping device %s with no group\n",
			 __func__, dev_name(dev));
		return 0;
	}
1136
1137
	pr_debug("%s: Adding %s to iommu group %d\n",
		 __func__, dev_name(dev),
1138
		 iommu_group_id(tgl->table_group->group));
1139

1140
	if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
1141
1142
1143
		pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
		       __func__, IOMMU_PAGE_SIZE(tbl),
		       PAGE_SIZE, dev_name(dev));
1144
1145
1146
		return -EINVAL;
	}

1147
	return iommu_group_add_device(tgl->table_group->group, dev);
1148
}
1149
EXPORT_SYMBOL_GPL(iommu_add_device);
1150

1151
void iommu_del_device(struct device *dev)
1152
{
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
	/*
	 * Some devices might not have IOMMU table and group
	 * and we needn't detach them from the associated
	 * IOMMU groups
	 */
	if (!dev->iommu_group) {
		pr_debug("iommu_tce: skipping device %s with no tbl\n",
			 dev_name(dev));
		return;
	}

1164
1165
	iommu_group_remove_device(dev);
}
1166
EXPORT_SYMBOL_GPL(iommu_del_device);
1167

1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
static int tce_iommu_bus_notifier(struct notifier_block *nb,
                unsigned long action, void *data)
{
        struct device *dev = data;

        switch (action) {
        case BUS_NOTIFY_ADD_DEVICE:
                return iommu_add_device(dev);
        case BUS_NOTIFY_DEL_DEVICE:
                if (dev->iommu_group)
                        iommu_del_device(dev);
                return 0;
        default:
                return 0;
        }
}

static struct notifier_block tce_iommu_bus_nb = {
        .notifier_call = tce_iommu_bus_notifier,
};

int __init tce_iommu_bus_notifier_init(void)
{
        bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
        return 0;
}
1194
#endif /* CONFIG_IOMMU_API */