dart_iommu.c 11.8 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 * arch/powerpc/sysdev/dart_iommu.c
Linus Torvalds's avatar
Linus Torvalds committed
3
 *
4
 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
5 6
 * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
 *                    IBM Corporation
Linus Torvalds's avatar
Linus Torvalds committed
7 8 9
 *
 * Based on pSeries_iommu.c:
 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
10
 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
Linus Torvalds's avatar
Linus Torvalds committed
11
 *
12 13
 * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
 *
Linus Torvalds's avatar
Linus Torvalds committed
14 15 16 17 18
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
19
 *
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22 23
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
24
 *
Linus Torvalds's avatar
Linus Torvalds committed
25 26 27 28 29 30 31 32 33 34 35 36 37
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

#include <linux/init.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
Johannes Berg's avatar
Johannes Berg committed
38
#include <linux/suspend.h>
Yinghai Lu's avatar
Yinghai Lu committed
39
#include <linux/memblock.h>
40
#include <linux/gfp.h>
41
#include <linux/kmemleak.h>
Linus Torvalds's avatar
Linus Torvalds committed
42 43 44 45 46 47
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/cacheflush.h>
48
#include <asm/ppc-pci.h>
Linus Torvalds's avatar
Linus Torvalds committed
49

David Gibson's avatar
David Gibson committed
50 51
#include "dart.h"

52 53
/* DART table address and size */
static u32 *dart_tablebase;
Linus Torvalds's avatar
Linus Torvalds committed
54 55 56
static unsigned long dart_tablesize;

/* Mapped base address for the dart */
57
static unsigned int __iomem *dart;
Linus Torvalds's avatar
Linus Torvalds committed
58 59 60 61

/* Dummy val that entries are set to when unused */
static unsigned int dart_emptyval;

62 63
static struct iommu_table iommu_table_dart;
static int iommu_table_dart_inited;
Linus Torvalds's avatar
Linus Torvalds committed
64
static int dart_dirty;
65
static int dart_is_u4;
Linus Torvalds's avatar
Linus Torvalds committed
66

67 68
#define DART_U4_BYPASS_BASE	0x8000000000ull

Linus Torvalds's avatar
Linus Torvalds committed
69 70
#define DBG(...)

71 72
static DEFINE_SPINLOCK(invalidate_lock);

Linus Torvalds's avatar
Linus Torvalds committed
73 74 75
static inline void dart_tlb_invalidate_all(void)
{
	unsigned long l = 0;
76
	unsigned int reg, inv_bit;
Linus Torvalds's avatar
Linus Torvalds committed
77
	unsigned long limit;
78 79 80
	unsigned long flags;

	spin_lock_irqsave(&invalidate_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
81 82 83 84 85 86 87 88

	DBG("dart: flush\n");

	/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
	 * control register and wait for it to clear.
	 *
	 * Gotcha: Sometimes, the DART won't detect that the bit gets
	 * set. If so, clear it and set it again.
89
	 */
Linus Torvalds's avatar
Linus Torvalds committed
90 91 92

	limit = 0;

93
	inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
Linus Torvalds's avatar
Linus Torvalds committed
94 95
retry:
	l = 0;
96 97 98 99 100
	reg = DART_IN(DART_CNTL);
	reg |= inv_bit;
	DART_OUT(DART_CNTL, reg);

	while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
Linus Torvalds's avatar
Linus Torvalds committed
101
		l++;
102
	if (l == (1L << limit)) {
Linus Torvalds's avatar
Linus Torvalds committed
103 104
		if (limit < 4) {
			limit++;
105 106
			reg = DART_IN(DART_CNTL);
			reg &= ~inv_bit;
107
			DART_OUT(DART_CNTL, reg);
Linus Torvalds's avatar
Linus Torvalds committed
108 109
			goto retry;
		} else
110
			panic("DART: TLB did not flush after waiting a long "
Linus Torvalds's avatar
Linus Torvalds committed
111 112
			      "time. Buggy U3 ?");
	}
113 114

	spin_unlock_irqrestore(&invalidate_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
115 116
}

117 118 119 120
static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
{
	unsigned int reg;
	unsigned int l, limit;
121 122 123
	unsigned long flags;

	spin_lock_irqsave(&invalidate_lock, flags);
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144

	reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
		(bus_rpn & DART_CNTL_U4_IONE_MASK);
	DART_OUT(DART_CNTL, reg);

	limit = 0;
wait_more:
	l = 0;
	while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
		rmb();
		l++;
	}

	if (l == (1L << limit)) {
		if (limit < 4) {
			limit++;
			goto wait_more;
		} else
			panic("DART: TLB did not flush after waiting a long "
			      "time. Buggy U4 ?");
	}
145 146

	spin_unlock_irqrestore(&invalidate_lock, flags);
147 148
}

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
static void dart_cache_sync(unsigned int *base, unsigned int count)
{
	/*
	 * We add 1 to the number of entries to flush, following a
	 * comment in Darwin indicating that the memory controller
	 * can prefetch unmapped memory under some circumstances.
	 */
	unsigned long start = (unsigned long)base;
	unsigned long end = start + (count + 1) * sizeof(unsigned int);
	unsigned int tmp;

	/* Perform a standard cache flush */
	flush_inval_dcache_range(start, end);

	/*
	 * Perform the sequence described in the CPC925 manual to
	 * ensure all the data gets to a point the cache incoherent
	 * DART hardware will see.
	 */
	asm volatile(" sync;"
		     " isync;"
		     " dcbf 0,%1;"
		     " sync;"
		     " isync;"
		     " lwz %0,0(%1);"
		     " isync" : "=r" (tmp) : "r" (end) : "memory");
}

Linus Torvalds's avatar
Linus Torvalds committed
177 178
static void dart_flush(struct iommu_table *tbl)
{
179
	mb();
180
	if (dart_dirty) {
Linus Torvalds's avatar
Linus Torvalds committed
181
		dart_tlb_invalidate_all();
182 183
		dart_dirty = 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
184 185
}

186
static int dart_build(struct iommu_table *tbl, long index,
Linus Torvalds's avatar
Linus Torvalds committed
187
		       long npages, unsigned long uaddr,
188
		       enum dma_data_direction direction,
189
		       unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
190
{
191
	unsigned int *dp, *orig_dp;
Linus Torvalds's avatar
Linus Torvalds committed
192
	unsigned int rpn;
193
	long l;
Linus Torvalds's avatar
Linus Torvalds committed
194 195 196

	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);

197
	orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
198

199
	/* On U3, all memory is contiguous, so we can move this
Linus Torvalds's avatar
Linus Torvalds committed
200 201
	 * out of the loop.
	 */
202 203
	l = npages;
	while (l--) {
204
		rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
205 206 207

		*(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);

208
		uaddr += DART_PAGE_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
209
	}
210
	dart_cache_sync(orig_dp, npages);
211

212 213 214 215 216 217 218
	if (dart_is_u4) {
		rpn = index;
		while (npages--)
			dart_tlb_invalidate_one(rpn++);
	} else {
		dart_dirty = 1;
	}
219
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
220 221 222 223 224
}


static void dart_free(struct iommu_table *tbl, long index, long npages)
{
225 226
	unsigned int *dp, *orig_dp;
	long orig_npages = npages;
227

Linus Torvalds's avatar
Linus Torvalds committed
228 229 230 231 232 233 234
	/* We don't worry about flushing the TLB cache. The only drawback of
	 * not doing it is that we won't catch buggy device drivers doing
	 * bad DMAs, but then no 32-bit architecture ever does either.
	 */

	DBG("dart: free at: %lx, %lx\n", index, npages);

235
	orig_dp = dp  = ((unsigned int *)tbl->it_base) + index;
236

Linus Torvalds's avatar
Linus Torvalds committed
237 238 239
	while (npages--)
		*(dp++) = dart_emptyval;

240 241
	dart_cache_sync(orig_dp, orig_npages);
}
Linus Torvalds's avatar
Linus Torvalds committed
242

243
static void allocate_dart(void)
Linus Torvalds's avatar
Linus Torvalds committed
244
{
245
	unsigned long tmp;
Linus Torvalds's avatar
Linus Torvalds committed
246

247 248
	/* 512 pages (2MB) is max DART tablesize. */
	dart_tablesize = 1UL << 21;
249

250 251 252
	/*
	 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
	 * will blow up an entire large page anyway in the kernel mapping.
Linus Torvalds's avatar
Linus Torvalds committed
253
	 */
254 255 256 257 258
	dart_tablebase = __va(memblock_alloc_base(1UL<<24,
						  1UL<<24, 0x80000000L));

	/* There is no point scanning the DART space for leaks*/
	kmemleak_no_scan((void *)dart_tablebase);
Linus Torvalds's avatar
Linus Torvalds committed
259 260 261 262 263

	/* Allocate a spare page to map all invalid DART pages. We need to do
	 * that to work around what looks like a problem with the HT bridge
	 * prefetching into invalid pages and corrupting data
	 */
Yinghai Lu's avatar
Yinghai Lu committed
264
	tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
265 266
	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
					 DARTMAP_RPNMASK);
Linus Torvalds's avatar
Linus Torvalds committed
267

268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
	printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
}

static int __init dart_init(struct device_node *dart_node)
{
	unsigned int i;
	unsigned long base, size;
	struct resource r;

	/* IOMMU disabled by the user ? bail out */
	if (iommu_is_off)
		return -ENODEV;

	/*
	 * Only use the DART if the machine has more than 1GB of RAM
	 * or if requested with iommu=on on cmdline.
	 *
	 * 1GB of RAM is picked as limit because some default devices
	 * (i.e. Airport Extreme) have 30 bit address range limits.
	 */

	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
		return -ENODEV;

	/* Get DART registers */
	if (of_address_to_resource(dart_node, 0, &r))
		panic("DART: can't get register base ! ");

296
	/* Map in DART registers */
297
	dart = ioremap(r.start, resource_size(&r));
Linus Torvalds's avatar
Linus Torvalds committed
298
	if (dart == NULL)
299
		panic("DART: Cannot map registers!");
Linus Torvalds's avatar
Linus Torvalds committed
300

301 302
	/* Allocate the DART and dummy page */
	allocate_dart();
Linus Torvalds's avatar
Linus Torvalds committed
303 304 305

	/* Fill initial table */
	for (i = 0; i < dart_tablesize/4; i++)
306 307 308 309
		dart_tablebase[i] = dart_emptyval;

	/* Push to memory */
	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
Linus Torvalds's avatar
Linus Torvalds committed
310 311

	/* Initialize DART with table base and enable it. */
312
	base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
313 314
	size = dart_tablesize >> DART_PAGE_SHIFT;
	if (dart_is_u4) {
315
		size &= DART_SIZE_U4_SIZE_MASK;
316 317 318 319
		DART_OUT(DART_BASE_U4, base);
		DART_OUT(DART_SIZE_U4, size);
		DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
	} else {
320
		size &= DART_CNTL_U3_SIZE_MASK;
321 322 323 324 325
		DART_OUT(DART_CNTL,
			 DART_CNTL_U3_ENABLE |
			 (base << DART_CNTL_U3_BASE_SHIFT) |
			 (size << DART_CNTL_U3_SIZE_SHIFT));
	}
Linus Torvalds's avatar
Linus Torvalds committed
326 327 328 329

	/* Invalidate DART to get rid of possible stale TLBs */
	dart_tlb_invalidate_all();

330 331
	printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
	       dart_is_u4 ? "U4" : "U3");
Linus Torvalds's avatar
Linus Torvalds committed
332 333 334 335

	return 0;
}

336 337 338 339 340 341
static struct iommu_table_ops iommu_dart_ops = {
	.set = dart_build,
	.clear = dart_free,
	.flush = dart_flush,
};

342
static void iommu_table_dart_setup(void)
Linus Torvalds's avatar
Linus Torvalds committed
343
{
344 345
	iommu_table_dart.it_busno = 0;
	iommu_table_dart.it_offset = 0;
Linus Torvalds's avatar
Linus Torvalds committed
346
	/* it_size is in number of entries */
347
	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
348
	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
Linus Torvalds's avatar
Linus Torvalds committed
349 350

	/* Initialize the common IOMMU code */
351
	iommu_table_dart.it_base = (unsigned long)dart_tablebase;
352 353
	iommu_table_dart.it_index = 0;
	iommu_table_dart.it_blocksize = 1;
354
	iommu_table_dart.it_ops = &iommu_dart_ops;
355
	iommu_init_table(&iommu_table_dart, -1);
Linus Torvalds's avatar
Linus Torvalds committed
356 357 358 359

	/* Reserve the last page of the DART to avoid possible prefetch
	 * past the DART mapped area
	 */
360
	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
Linus Torvalds's avatar
Linus Torvalds committed
361 362
}

363 364
static void pci_dma_dev_setup_dart(struct pci_dev *dev)
{
365 366 367
	if (dart_is_u4)
		set_dma_offset(&dev->dev, DART_U4_BYPASS_BASE);
	set_iommu_table_base(&dev->dev, &iommu_table_dart);
Linus Torvalds's avatar
Linus Torvalds committed
368 369
}

370
static void pci_dma_bus_setup_dart(struct pci_bus *bus)
Linus Torvalds's avatar
Linus Torvalds committed
371
{
372 373 374
	if (!iommu_table_dart_inited) {
		iommu_table_dart_inited = 1;
		iommu_table_dart_setup();
Linus Torvalds's avatar
Linus Torvalds committed
375 376 377
	}
}

378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
static bool dart_device_on_pcie(struct device *dev)
{
	struct device_node *np = of_node_get(dev->of_node);

	while(np) {
		if (of_device_is_compatible(np, "U4-pcie") ||
		    of_device_is_compatible(np, "u4-pcie")) {
			of_node_put(np);
			return true;
		}
		np = of_get_next_parent(np);
	}
	return false;
}

static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
{
	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
		return -EIO;

	/* U4 supports a DART bypass, we use it for 64-bit capable
	 * devices to improve performances. However, that only works
	 * for devices connected to U4 own PCIe interface, not bridged
	 * through hypertransport. We need the device to support at
	 * least 40 bits of addresses.
	 */
	if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) {
		dev_info(dev, "Using 64-bit DMA iommu bypass\n");
406
		set_dma_ops(dev, &dma_nommu_ops);
407 408 409 410 411 412 413 414 415
	} else {
		dev_info(dev, "Using 32-bit DMA via iommu\n");
		set_dma_ops(dev, &dma_iommu_ops);
	}

	*dev->dma_mask = dma_mask;
	return 0;
}

416
void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
Linus Torvalds's avatar
Linus Torvalds committed
417 418 419 420 421
{
	struct device_node *dn;

	/* Find the DART in the device-tree */
	dn = of_find_compatible_node(NULL, "dart", "u3-dart");
422 423 424
	if (dn == NULL) {
		dn = of_find_compatible_node(NULL, "dart", "u4-dart");
		if (dn == NULL)
425
			return;	/* use default direct_dma_ops */
426 427
		dart_is_u4 = 1;
	}
Linus Torvalds's avatar
Linus Torvalds committed
428

429 430 431 432 433 434 435
	/* Initialize the DART HW */
	if (dart_init(dn) != 0)
		goto bail;

	/* Setup bypass if supported */
	if (dart_is_u4)
		ppc_md.dma_set_mask = dart_dma_set_mask;
Linus Torvalds's avatar
Linus Torvalds committed
436

437 438 439
	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;

440 441 442
	/* Setup pci_dma ops */
	set_pci_dma_ops(&dma_iommu_ops);
	return;
443 444 445

 bail:
	/* If init failed, use direct iommu and null setup functions */
446 447
	controller_ops->dma_dev_setup = NULL;
	controller_ops->dma_bus_setup = NULL;
448 449

	/* Setup pci_dma ops */
450
	set_pci_dma_ops(&dma_nommu_ops);
Linus Torvalds's avatar
Linus Torvalds committed
451 452
}

Johannes Berg's avatar
Johannes Berg committed
453 454 455
#ifdef CONFIG_PM
static void iommu_dart_restore(void)
{
456
	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
Johannes Berg's avatar
Johannes Berg committed
457 458 459 460 461 462 463 464 465 466 467 468 469 470
	dart_tlb_invalidate_all();
}

static int __init iommu_init_late_dart(void)
{
	if (!dart_tablebase)
		return 0;

	ppc_md.iommu_restore = iommu_dart_restore;

	return 0;
}

late_initcall(iommu_init_late_dart);
471
#endif /* CONFIG_PM */