csum-copy_64.S 3.66 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2 3
 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
 *
Linus Torvalds's avatar
Linus Torvalds committed
4 5 6 7
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file COPYING in the main directory of this archive
 * for more details. No warranty for anything given at all.
 */
8 9
#include <linux/linkage.h>
#include <asm/errno.h>
10
#include <asm/asm.h>
Linus Torvalds's avatar
Linus Torvalds committed
11 12 13

/*
 * Checksum copy with exception handling.
14
 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
Linus Torvalds's avatar
Linus Torvalds committed
15
 * destination is zeroed.
16
 *
Linus Torvalds's avatar
Linus Torvalds committed
17 18 19 20
 * Input
 * rdi  source
 * rsi  destination
 * edx  len (32bit)
21
 * ecx  sum (32bit)
Linus Torvalds's avatar
Linus Torvalds committed
22 23 24 25 26
 * r8   src_err_ptr (int)
 * r9   dst_err_ptr (int)
 *
 * Output
 * eax  64bit sum. undefined in case of exception.
27 28
 *
 * Wrappers need to take care of valid exception sum and zeroing.
Linus Torvalds's avatar
Linus Torvalds committed
29 30 31 32 33
 * They also should align source or destination to 8 bytes.
 */

	.macro source
10:
34
	_ASM_EXTABLE(10b, .Lbad_source)
Linus Torvalds's avatar
Linus Torvalds committed
35
	.endm
36

Linus Torvalds's avatar
Linus Torvalds committed
37 38
	.macro dest
20:
39
	_ASM_EXTABLE(20b, .Lbad_dest)
Linus Torvalds's avatar
Linus Torvalds committed
40
	.endm
41

Linus Torvalds's avatar
Linus Torvalds committed
42 43
	.macro ignore L=.Lignore
30:
44
	_ASM_EXTABLE(30b, \L)
Linus Torvalds's avatar
Linus Torvalds committed
45
	.endm
46 47


48
ENTRY(csum_partial_copy_generic)
49 50
	cmpl	$3*64, %edx
	jle	.Lignore
Linus Torvalds's avatar
Linus Torvalds committed
51

52 53 54 55 56 57
.Lignore:
	subq  $7*8, %rsp
	movq  %rbx, 2*8(%rsp)
	movq  %r12, 3*8(%rsp)
	movq  %r14, 4*8(%rsp)
	movq  %r13, 5*8(%rsp)
58
	movq  %r15, 6*8(%rsp)
Linus Torvalds's avatar
Linus Torvalds committed
59

60 61
	movq  %r8, (%rsp)
	movq  %r9, 1*8(%rsp)
Linus Torvalds's avatar
Linus Torvalds committed
62

63 64
	movl  %ecx, %eax
	movl  %edx, %ecx
Linus Torvalds's avatar
Linus Torvalds committed
65

66 67 68 69 70
	xorl  %r9d, %r9d
	movq  %rcx, %r12

	shrq  $6, %r12
	jz	.Lhandle_tail       /* < 64 */
Linus Torvalds's avatar
Linus Torvalds committed
71 72

	clc
73

Linus Torvalds's avatar
Linus Torvalds committed
74 75 76
	/* main loop. clear in 64 byte blocks */
	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
	/* r11:	temp3, rdx: temp4, r12 loopcnt */
77
	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
Linus Torvalds's avatar
Linus Torvalds committed
78 79 80
	.p2align 4
.Lloop:
	source
81
	movq  (%rdi), %rbx
Linus Torvalds's avatar
Linus Torvalds committed
82
	source
83
	movq  8(%rdi), %r8
Linus Torvalds's avatar
Linus Torvalds committed
84
	source
85
	movq  16(%rdi), %r11
Linus Torvalds's avatar
Linus Torvalds committed
86
	source
87
	movq  24(%rdi), %rdx
Linus Torvalds's avatar
Linus Torvalds committed
88 89

	source
90
	movq  32(%rdi), %r10
Linus Torvalds's avatar
Linus Torvalds committed
91
	source
92
	movq  40(%rdi), %r15
Linus Torvalds's avatar
Linus Torvalds committed
93
	source
94
	movq  48(%rdi), %r14
Linus Torvalds's avatar
Linus Torvalds committed
95
	source
96 97
	movq  56(%rdi), %r13

Linus Torvalds's avatar
Linus Torvalds committed
98 99
	ignore 2f
	prefetcht0 5*64(%rdi)
100 101 102 103 104 105
2:
	adcq  %rbx, %rax
	adcq  %r8, %rax
	adcq  %r11, %rax
	adcq  %rdx, %rax
	adcq  %r10, %rax
106
	adcq  %r15, %rax
107 108
	adcq  %r14, %rax
	adcq  %r13, %rax
Linus Torvalds's avatar
Linus Torvalds committed
109 110

	decl %r12d
111

Linus Torvalds's avatar
Linus Torvalds committed
112
	dest
113
	movq %rbx, (%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
114
	dest
115
	movq %r8, 8(%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
116
	dest
117
	movq %r11, 16(%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
118
	dest
119
	movq %rdx, 24(%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
120 121

	dest
122
	movq %r10, 32(%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
123
	dest
124
	movq %r15, 40(%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
125
	dest
126
	movq %r14, 48(%rsi)
Linus Torvalds's avatar
Linus Torvalds committed
127
	dest
128 129
	movq %r13, 56(%rsi)

Linus Torvalds's avatar
Linus Torvalds committed
130 131
3:

132 133
	leaq 64(%rdi), %rdi
	leaq 64(%rsi), %rsi
Linus Torvalds's avatar
Linus Torvalds committed
134

135 136 137
	jnz	.Lloop

	adcq  %r9, %rax
Linus Torvalds's avatar
Linus Torvalds committed
138

Lucas De Marchi's avatar
Lucas De Marchi committed
139
	/* do last up to 56 bytes */
Linus Torvalds's avatar
Linus Torvalds committed
140 141
.Lhandle_tail:
	/* ecx:	count */
142 143 144 145
	movl %ecx, %r10d
	andl $63, %ecx
	shrl $3, %ecx
	jz	.Lfold
Linus Torvalds's avatar
Linus Torvalds committed
146 147
	clc
	.p2align 4
148
.Lloop_8:
Linus Torvalds's avatar
Linus Torvalds committed
149
	source
150 151
	movq (%rdi), %rbx
	adcq %rbx, %rax
Linus Torvalds's avatar
Linus Torvalds committed
152 153
	decl %ecx
	dest
154 155 156
	movq %rbx, (%rsi)
	leaq 8(%rsi), %rsi /* preserve carry */
	leaq 8(%rdi), %rdi
Linus Torvalds's avatar
Linus Torvalds committed
157
	jnz	.Lloop_8
158
	adcq %r9, %rax	/* add in carry */
Linus Torvalds's avatar
Linus Torvalds committed
159 160 161

.Lfold:
	/* reduce checksum to 32bits */
162 163 164 165
	movl %eax, %ebx
	shrq $32, %rax
	addl %ebx, %eax
	adcl %r9d, %eax
Linus Torvalds's avatar
Linus Torvalds committed
166

167
	/* do last up to 6 bytes */
Linus Torvalds's avatar
Linus Torvalds committed
168
.Lhandle_7:
169 170 171
	movl %r10d, %ecx
	andl $7, %ecx
	shrl $1, %ecx
Linus Torvalds's avatar
Linus Torvalds committed
172
	jz   .Lhandle_1
173 174 175
	movl $2, %edx
	xorl %ebx, %ebx
	clc
Linus Torvalds's avatar
Linus Torvalds committed
176
	.p2align 4
177
.Lloop_1:
Linus Torvalds's avatar
Linus Torvalds committed
178
	source
179 180
	movw (%rdi), %bx
	adcl %ebx, %eax
Linus Torvalds's avatar
Linus Torvalds committed
181
	decl %ecx
182
	dest
183 184 185
	movw %bx, (%rsi)
	leaq 2(%rdi), %rdi
	leaq 2(%rsi), %rsi
Linus Torvalds's avatar
Linus Torvalds committed
186
	jnz .Lloop_1
187 188
	adcl %r9d, %eax	/* add in carry */

Linus Torvalds's avatar
Linus Torvalds committed
189 190
	/* handle last odd byte */
.Lhandle_1:
191
	testb $1, %r10b
Linus Torvalds's avatar
Linus Torvalds committed
192
	jz    .Lende
193
	xorl  %ebx, %ebx
Linus Torvalds's avatar
Linus Torvalds committed
194
	source
195
	movb (%rdi), %bl
Linus Torvalds's avatar
Linus Torvalds committed
196
	dest
197 198 199 200
	movb %bl, (%rsi)
	addl %ebx, %eax
	adcl %r9d, %eax		/* carry */

Linus Torvalds's avatar
Linus Torvalds committed
201
.Lende:
202 203 204 205
	movq 2*8(%rsp), %rbx
	movq 3*8(%rsp), %r12
	movq 4*8(%rsp), %r14
	movq 5*8(%rsp), %r13
206
	movq 6*8(%rsp), %r15
207
	addq $7*8, %rsp
Linus Torvalds's avatar
Linus Torvalds committed
208 209 210 211
	ret

	/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
212 213
	movq (%rsp), %rax
	testq %rax, %rax
Linus Torvalds's avatar
Linus Torvalds committed
214
	jz   .Lende
215
	movl $-EFAULT, (%rax)
Linus Torvalds's avatar
Linus Torvalds committed
216
	jmp  .Lende
217

Linus Torvalds's avatar
Linus Torvalds committed
218
.Lbad_dest:
219 220 221 222
	movq 8(%rsp), %rax
	testq %rax, %rax
	jz   .Lende
	movl $-EFAULT, (%rax)
Linus Torvalds's avatar
Linus Torvalds committed
223
	jmp .Lende
224
ENDPROC(csum_partial_copy_generic)