From bb8646d8340fa7c1b66a037428e39f85f8738f0a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sat, 18 Mar 2006 23:55:11 -0800
Subject: [PATCH] [SPARC64]: Optimized TSB table initialization.

We only need to write an invalid tag every 16 bytes,
so taking advantage of this can save many instructions
compared to the simple memset() call we make now.

A prefetching implementation is implemented for sun4u
and a block-init store version if implemented for Niagara.

The next trick is to be able to perform an init and
a copy_tsb() in parallel when growing a TSB table.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/tsb.S  | 69 ++++++++++++++++++++++++++++++++++++++
 arch/sparc64/lib/NGbzero.S |  1 +
 arch/sparc64/mm/tsb.c      |  2 +-
 include/asm-sparc64/mmu.h  |  1 +
 4 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 1b154c863628e..118baea44f699 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -371,3 +371,72 @@ copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
 	retl
 	 TSB_MEMBAR
 	.size		copy_tsb, .-copy_tsb
+
+	/* Set the invalid bit in all TSB entries.  */
+	.align		32
+	.globl		tsb_init
+	.type		tsb_init,#function
+tsb_init:		/* %o0 = TSB vaddr, %o1 = size in bytes */
+	prefetch	[%o0 + 0x000], #n_writes
+	mov		1, %g1
+	prefetch	[%o0 + 0x040], #n_writes
+	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
+	prefetch	[%o0 + 0x080], #n_writes
+1:	prefetch	[%o0 + 0x0c0], #n_writes
+	stx		%g1, [%o0 + 0x00]
+	stx		%g1, [%o0 + 0x10]
+	stx		%g1, [%o0 + 0x20]
+	stx		%g1, [%o0 + 0x30]
+	prefetch	[%o0 + 0x100], #n_writes
+	stx		%g1, [%o0 + 0x40]
+	stx		%g1, [%o0 + 0x50]
+	stx		%g1, [%o0 + 0x60]
+	stx		%g1, [%o0 + 0x70]
+	prefetch	[%o0 + 0x140], #n_writes
+	stx		%g1, [%o0 + 0x80]
+	stx		%g1, [%o0 + 0x90]
+	stx		%g1, [%o0 + 0xa0]
+	stx		%g1, [%o0 + 0xb0]
+	prefetch	[%o0 + 0x180], #n_writes
+	stx		%g1, [%o0 + 0xc0]
+	stx		%g1, [%o0 + 0xd0]
+	stx		%g1, [%o0 + 0xe0]
+	stx		%g1, [%o0 + 0xf0]
+	subcc		%o1, 0x100, %o1
+	bne,pt		%xcc, 1b
+	 add		%o0, 0x100, %o0
+	retl
+	 nop
+	nop
+	nop
+	.size		tsb_init, .-tsb_init
+
+	.globl		NGtsb_init
+	.type		NGtsb_init,#function
+NGtsb_init:
+	rd		%asi, %g2
+	mov		1, %g1
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
+1:	stxa		%g1, [%o0 + 0x00] %asi
+	stxa		%g1, [%o0 + 0x10] %asi
+	stxa		%g1, [%o0 + 0x20] %asi
+	stxa		%g1, [%o0 + 0x30] %asi
+	stxa		%g1, [%o0 + 0x40] %asi
+	stxa		%g1, [%o0 + 0x50] %asi
+	stxa		%g1, [%o0 + 0x60] %asi
+	stxa		%g1, [%o0 + 0x70] %asi
+	stxa		%g1, [%o0 + 0x80] %asi
+	stxa		%g1, [%o0 + 0x90] %asi
+	stxa		%g1, [%o0 + 0xa0] %asi
+	stxa		%g1, [%o0 + 0xb0] %asi
+	stxa		%g1, [%o0 + 0xc0] %asi
+	stxa		%g1, [%o0 + 0xd0] %asi
+	stxa		%g1, [%o0 + 0xe0] %asi
+	stxa		%g1, [%o0 + 0xf0] %asi
+	subcc		%o1, 0x100, %o1
+	bne,pt		%xcc, 1b
+	 add		%o0, 0x100, %o0
+	retl
+	 wr		%g2, 0x0, %asi
+	.size		NGtsb_init, .-NGtsb_init
diff --git a/arch/sparc64/lib/NGbzero.S b/arch/sparc64/lib/NGbzero.S
index fef584f745dc7..e86baece5cc86 100644
--- a/arch/sparc64/lib/NGbzero.S
+++ b/arch/sparc64/lib/NGbzero.S
@@ -157,6 +157,7 @@ niagara_patch_bzero:
 	NG_DO_PATCH(memset, NGmemset)
 	NG_DO_PATCH(__bzero, NGbzero)
 	NG_DO_PATCH(__clear_user, NGclear_user)
+	NG_DO_PATCH(tsb_init, NGtsb_init)
 	retl
 	 nop
 	.size	niagara_patch_bzero,.-niagara_patch_bzero
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index 1af797a0a0924..b2064e2a44d6f 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -313,7 +313,7 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss)
 	}
 
 	/* Mark all tags as invalid.  */
-	memset(new_tsb, 0x40, new_size);
+	tsb_init(new_tsb, new_size);
 
 	/* Ok, we are about to commit the changes.  If we are
 	 * growing an existing TSB the locking is very tricky,
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index da14a9bf0ed61..230ba678d3b04 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -88,6 +88,7 @@ struct tsb {
 
 extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte);
 extern void tsb_flush(unsigned long ent, unsigned long tag);
+extern void tsb_init(struct tsb *tsb, unsigned long size);
 
 typedef struct {
 	spinlock_t		lock;
-- 
GitLab