Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | /* checksum.S: Sparc V9 optimized checksum code. * * Copyright(C) 1995 Linus Torvalds * Copyright(C) 1995 Miguel de Icaza * Copyright(C) 1996 David S. Miller * Copyright(C) 1997 Jakub Jelinek * * derived from: * Linux/Alpha checksum c-code * Linux/ix86 inline checksum assembly * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) * David Mosberger-Tang for optimized reference c-code * BSD4.4 portable checksum routine */ #include <asm/errno.h> #include <asm/head.h> #include <asm/ptrace.h> #include <asm/asi.h> #include <asm/page.h> #include <asm/asm_offsets.h> /* The problem with the "add with carry" instructions on Ultra * are two fold. Firstly, they cannot pair with jack shit, * and also they only add in the 32-bit carry condition bit * into the accumulated sum. The following is much better. * For larger chunks we use VIS code, which is faster ;) */ #define src o0 #define dst o1 #define len o2 #define sum o3 .text /* I think I have an erection... Once _AGAIN_ the SunSoft * engineers are caught asleep at the keyboard, tsk tsk... */ #define CSUMCOPY_LASTCHUNK(off, t0, t1) \ ldxa [%src - off - 0x08] %asi, t0; \ ldxa [%src - off - 0x00] %asi, t1; \ nop; nop; \ addcc t0, %sum, %sum; \ stw t0, [%dst - off - 0x04]; \ srlx t0, 32, t0; \ bcc,pt %xcc, 51f; \ stw t0, [%dst - off - 0x08]; \ add %sum, 1, %sum; \ 51: addcc t1, %sum, %sum; \ stw t1, [%dst - off + 0x04]; \ srlx t1, 32, t1; \ bcc,pt %xcc, 52f; \ stw t1, [%dst - off - 0x00]; \ add %sum, 1, %sum; \ 52: cpc_start: cc_end_cruft: andcc %g7, 8, %g0 ! IEU1 Group be,pn %icc, 1f ! CTI and %g7, 4, %g5 ! IEU0 ldxa [%src + 0x00] %asi, %g2 ! Load Group add %dst, 8, %dst ! IEU0 add %src, 8, %src ! IEU1 addcc %g2, %sum, %sum ! IEU1 Group + 2 bubbles stw %g2, [%dst - 0x04] ! Store srlx %g2, 32, %g2 ! IEU0 bcc,pt %xcc, 1f ! CTI Group stw %g2, [%dst - 0x08] ! Store add %sum, 1, %sum ! IEU0 1: brz,pt %g5, 1f ! CTI Group clr %g2 ! IEU0 lduwa [%src + 0x00] %asi, %g2 ! Load add %dst, 4, %dst ! IEU0 Group add %src, 4, %src ! IEU1 stw %g2, [%dst - 0x04] ! Store Group + 2 bubbles sllx %g2, 32, %g2 ! IEU0 1: andcc %g7, 2, %g0 ! IEU1 be,pn %icc, 1f ! CTI Group clr %o4 ! IEU1 lduha [%src + 0x00] %asi, %o4 ! Load add %src, 2, %src ! IEU0 Group add %dst, 2, %dst ! IEU1 sth %o4, [%dst - 0x2] ! Store Group + 2 bubbles sll %o4, 16, %o4 ! IEU0 1: andcc %g7, 1, %g0 ! IEU1 be,pn %icc, 1f ! CTI Group clr %o5 ! IEU0 lduba [%src + 0x00] %asi, %o5 ! Load stb %o5, [%dst + 0x00] ! Store Group + 2 bubbles sll %o5, 8, %o5 ! IEU0 1: or %g2, %o4, %o4 ! IEU1 or %o5, %o4, %o4 ! IEU0 Group addcc %o4, %sum, %sum ! IEU1 bcc,pt %xcc, ccfold ! CTI sethi %uhi(PAGE_OFFSET), %g4 ! IEU0 Group b,pt %xcc, ccfold ! CTI add %sum, 1, %sum ! IEU1 cc_fixit: cmp %len, 6 ! IEU1 Group bl,a,pn %icc, ccte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group andcc %src, 2, %g0 ! IEU1 Group be,pn %icc, 1f ! CTI andcc %src, 0x4, %g0 ! IEU1 Group lduha [%src + 0x00] %asi, %g4 ! Load sub %len, 2, %len ! IEU0 add %src, 2, %src ! IEU0 Group add %dst, 2, %dst ! IEU1 sll %g4, 16, %g3 ! IEU0 Group + 1 bubble addcc %g3, %sum, %sum ! IEU1 bcc,pt %xcc, 0f ! CTI srl %sum, 16, %g3 ! IEU0 Group add %g3, 1, %g3 ! IEU0 4 clocks (mispredict) 0: andcc %src, 0x4, %g0 ! IEU1 Group sth %g4, [%dst - 0x2] ! Store sll %sum, 16, %sum ! IEU0 sll %g3, 16, %g3 ! IEU0 Group srl %sum, 16, %sum ! IEU0 Group or %g3, %sum, %sum ! IEU0 Group (regdep) 1: be,pt %icc, ccmerge ! CTI andcc %len, 0xf0, %g1 ! IEU1 lduwa [%src + 0x00] %asi, %g4 ! Load Group sub %len, 4, %len ! IEU0 add %src, 4, %src ! IEU1 add %dst, 4, %dst ! IEU0 Group addcc %g4, %sum, %sum ! IEU1 Group + 1 bubble stw %g4, [%dst - 0x4] ! Store bcc,pt %xcc, ccmerge ! CTI andcc %len, 0xf0, %g1 ! IEU1 Group b,pt %xcc, ccmerge ! CTI 4 clocks (mispredict) add %sum, 1, %sum ! IEU0 .align 32 .globl csum_partial_copy_sparc64 csum_partial_copy_sparc64: /* %o0=src, %o1=dest, %o2=len, %o3=sum */ xorcc %src, %dst, %o4 ! IEU1 Group srl %sum, 0, %sum ! IEU0 andcc %o4, 3, %g0 ! IEU1 Group srl %len, 0, %len ! IEU0 bne,pn %icc, ccslow ! CTI andcc %src, 1, %g0 ! IEU1 Group bne,pn %icc, ccslow ! CTI cmp %len, 256 ! IEU1 Group bgeu,pt %icc, csum_partial_copy_vis ! CTI andcc %src, 7, %g0 ! IEU1 Group bne,pn %icc, cc_fixit ! CTI andcc %len, 0xf0, %g1 ! IEU1 Group ccmerge:be,pn %icc, ccte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group sll %g1, 2, %o4 ! IEU0 13: sethi %hi(12f), %o5 ! IEU0 Group add %src, %g1, %src ! IEU1 sub %o5, %o4, %o5 ! IEU0 Group jmpl %o5 + %lo(12f), %g0 ! CTI Group brk forced add %dst, %g1, %dst ! IEU0 Group cctbl: CSUMCOPY_LASTCHUNK(0xe8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xd8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xc8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xb8,%g2,%g3) CSUMCOPY_LASTCHUNK(0xa8,%g2,%g3) CSUMCOPY_LASTCHUNK(0x98,%g2,%g3) CSUMCOPY_LASTCHUNK(0x88,%g2,%g3) CSUMCOPY_LASTCHUNK(0x78,%g2,%g3) CSUMCOPY_LASTCHUNK(0x68,%g2,%g3) CSUMCOPY_LASTCHUNK(0x58,%g2,%g3) CSUMCOPY_LASTCHUNK(0x48,%g2,%g3) CSUMCOPY_LASTCHUNK(0x38,%g2,%g3) CSUMCOPY_LASTCHUNK(0x28,%g2,%g3) CSUMCOPY_LASTCHUNK(0x18,%g2,%g3) CSUMCOPY_LASTCHUNK(0x08,%g2,%g3) 12: andcc %len, 0xf, %g7 ! IEU1 Group ccte: bne,pn %icc, cc_end_cruft ! CTI sethi %uhi(PAGE_OFFSET), %g4 ! IEU0 ccfold: sllx %sum, 32, %o0 ! IEU0 Group addcc %sum, %o0, %o0 ! IEU1 Group (regdep) srlx %o0, 32, %o0 ! IEU0 Group (regdep) bcs,a,pn %xcc, 1f ! CTI add %o0, 1, %o0 ! IEU1 4 clocks (mispredict) 1: retl ! CTI Group brk forced sllx %g4, 32, %g4 ! IEU0 Group ccslow: mov 0, %g5 brlez,pn %len, 4f andcc %src, 1, %o5 be,a,pt %icc, 1f srl %len, 1, %g7 sub %len, 1, %len lduba [%src] %asi, %g5 add %src, 1, %src stb %g5, [%dst] srl %len, 1, %g7 add %dst, 1, %dst 1: brz,a,pn %g7, 3f andcc %len, 1, %g0 andcc %src, 2, %g0 be,a,pt %icc, 1f srl %g7, 1, %g7 lduha [%src] %asi, %o4 sub %len, 2, %len srl %o4, 8, %g2 sub %g7, 1, %g7 stb %g2, [%dst] add %o4, %g5, %g5 stb %o4, [%dst + 1] add %src, 2, %src srl %g7, 1, %g7 add %dst, 2, %dst 1: brz,a,pn %g7, 2f andcc %len, 2, %g0 lduwa [%src] %asi, %o4 5: srl %o4, 24, %g2 srl %o4, 16, %g3 stb %g2, [%dst] srl %o4, 8, %g2 stb %g3, [%dst + 1] add %src, 4, %src stb %g2, [%dst + 2] addcc %o4, %g5, %g5 stb %o4, [%dst + 3] addc %g5, %g0, %g5 add %dst, 4, %dst subcc %g7, 1, %g7 bne,a,pt %icc, 5b lduwa [%src] %asi, %o4 sll %g5, 16, %g2 srl %g5, 16, %g5 srl %g2, 16, %g2 andcc %len, 2, %g0 add %g2, %g5, %g5 2: be,a,pt %icc, 3f andcc %len, 1, %g0 lduha [%src] %asi, %o4 andcc %len, 1, %g0 srl %o4, 8, %g2 add %src, 2, %src stb %g2, [%dst] add %g5, %o4, %g5 stb %o4, [%dst + 1] add %dst, 2, %dst 3: be,a,pt %icc, 1f sll %g5, 16, %o4 lduba [%src] %asi, %g2 sll %g2, 8, %o4 stb %g2, [%dst] add %g5, %o4, %g5 sll %g5, 16, %o4 1: addcc %o4, %g5, %g5 srl %g5, 16, %o4 addc %g0, %o4, %g5 brz,pt %o5, 4f srl %g5, 8, %o4 and %g5, 0xff, %g2 and %o4, 0xff, %o4 sll %g2, 8, %g2 or %g2, %o4, %g5 4: addcc %sum, %g5, %sum addc %g0, %sum, %o0 retl srl %o0, 0, %o0 cpc_end: .globl cpc_handler cpc_handler: ldx [%sp + 0x7ff + 128], %g1 ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %g3 sub %g0, EFAULT, %g2 brnz,a,pt %g1, 1f st %g2, [%g1] 1: sethi %uhi(PAGE_OFFSET), %g4 wr %g3, %g0, %asi retl sllx %g4, 32, %g4 .section __ex_table .align 4 .word cpc_start, 0, cpc_end, cpc_handler |