Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | /* * This file contains assembly-language implementations * of IP-style 1's complement checksum routines. * * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ #include <linux/sys.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> /* * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header * len is in words and is always >= 5. * * In practice len == 5, but this is not guaranteed. So this code does not * attempt to use doubleword instructions. */ _GLOBAL(ip_fast_csum) lwz r0,0(r3) lwzu r5,4(r3) addic. r4,r4,-2 addc r0,r0,r5 mtctr r4 blelr- 1: lwzu r4,4(r3) adde r0,r0,r4 bdnz 1b addze r0,r0 /* add in final carry */ rldicl r4,r0,32,0 /* fold two 32-bit halves together */ add r0,r0,r4 srdi r0,r0,32 rlwinm r3,r0,16,0,31 /* fold two halves together */ add r3,r0,r3 not r3,r3 srwi r3,r3,16 blr /* * Compute checksum of TCP or UDP pseudo-header: * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) * No real gain trying to do this specially for 64 bit, but * the 32 bit addition may spill into the upper bits of * the doubleword so we still must fold it down from 64. */ _GLOBAL(csum_tcpudp_magic) rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ addc r0,r3,r4 /* add 4 32-bit words together */ adde r0,r0,r5 adde r0,r0,r7 rldicl r4,r0,32,0 /* fold 64 bit value */ add r0,r4,r0 srdi r0,r0,32 rlwinm r3,r0,16,0,31 /* fold two halves together */ add r3,r0,r3 not r3,r3 srwi r3,r3,16 blr /* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * * This code assumes at least halfword alignment, though the length * can be any number of bytes. The sum is accumulated in r5. * * csum_partial(r3=buff, r4=len, r5=sum) */ _GLOBAL(csum_partial) subi r3,r3,8 /* we'll offset by 8 for the loads */ srdi. r6,r4,3 /* divide by 8 for doubleword count */ addic r5,r5,0 /* clear carry */ beq 3f /* if we're doing < 8 bytes */ andi. r0,r3,2 /* aligned on a word boundary already? */ beq+ 1f lhz r6,8(r3) /* do 2 bytes to get aligned */ addi r3,r3,2 subi r4,r4,2 addc r5,r5,r6 srdi. r6,r4,3 /* recompute number of doublewords */ beq 3f /* any left? */ 1: mtctr r6 2: ldu r6,8(r3) /* main sum loop */ adde r5,r5,r6 bdnz 2b andi. r4,r4,7 /* compute bytes left to sum after doublewords */ 3: cmpi 0,r4,4 /* is at least a full word left? */ blt 4f lwz r6,8(r3) /* sum this word */ addi r3,r3,4 subi r4,r4,4 adde r5,r5,r6 4: cmpi 0,r4,2 /* is at least a halfword left? */ blt+ 5f lhz r6,8(r3) /* sum this halfword */ addi r3,r3,2 subi r4,r4,2 adde r5,r5,r6 5: cmpi 0,r4,1 /* is at least a byte left? */ bne+ 6f lbz r6,8(r3) /* sum this byte */ slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ adde r5,r5,r6 6: addze r5,r5 /* add in final carry */ rldicl r4,r5,32,0 /* fold two 32-bit halves together */ add r3,r4,r5 srdi r3,r3,32 blr /* * Computes the checksum of a memory block at src, length len, * and adds in "sum" (32-bit), while copying the block to dst. * If an access exception occurs on src or dst, it stores -EFAULT * to *src_err or *dst_err respectively, and (for an error on * src) zeroes the rest of dst. * * This code needs to be reworked to take advantage of 64 bit sum+copy. * However, due to tokenring halfword alignment problems this will be very * tricky. For now we'll leave it until we instrument it somehow. * * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) */ _GLOBAL(csum_partial_copy_generic) addic r0,r6,0 subi r3,r3,4 subi r4,r4,4 srwi. r6,r5,2 beq 3f /* if we're doing < 4 bytes */ andi. r9,r4,2 /* Align dst to longword boundary */ beq+ 1f 81: lhz r6,4(r3) /* do 2 bytes to get aligned */ addi r3,r3,2 subi r5,r5,2 91: sth r6,4(r4) addi r4,r4,2 addc r0,r0,r6 srwi. r6,r5,2 /* # words to do */ beq 3f 1: mtctr r6 82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ 92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ adde r0,r0,r6 bdnz 82b andi. r5,r5,3 3: cmpi 0,r5,2 blt+ 4f 83: lhz r6,4(r3) addi r3,r3,2 subi r5,r5,2 93: sth r6,4(r4) addi r4,r4,2 adde r0,r0,r6 4: cmpi 0,r5,1 bne+ 5f 84: lbz r6,4(r3) 94: stb r6,4(r4) slwi r6,r6,8 /* Upper byte of word */ adde r0,r0,r6 5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ rldicl r4,r3,32,0 /* fold 64 bit value */ add r3,r4,r3 srdi r3,r3,32 blr /* These shouldn't go in the fixup section, since that would cause the ex_table addresses to get out of order. */ .globl src_error_1 src_error_1: li r6,0 subi r5,r5,2 95: sth r6,4(r4) addi r4,r4,2 srwi. r6,r5,2 beq 3f mtctr r6 .globl src_error_2 src_error_2: li r6,0 96: stwu r6,4(r4) bdnz 96b 3: andi. r5,r5,3 beq src_error .globl src_error_3 src_error_3: li r6,0 mtctr r5 addi r4,r4,3 97: stbu r6,1(r4) bdnz 97b .globl src_error src_error: cmpi 0,r7,0 beq 1f li r6,-EFAULT stw r6,0(r7) 1: addze r3,r0 blr .globl dst_error dst_error: cmpi 0,r8,0 beq 1f li r6,-EFAULT stw r6,0(r8) 1: addze r3,r0 blr .section __ex_table,"a" .align 3 .llong 81b,src_error_1 .llong 91b,dst_error .llong 82b,src_error_2 .llong 92b,dst_error .llong 83b,src_error_3 .llong 93b,dst_error .llong 84b,src_error_3 .llong 94b,dst_error .llong 95b,dst_error .llong 96b,dst_error .llong 97b,dst_error |