Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | /* memset.S: optimised assembly memset * * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ .text .p2align 4 ############################################################################### # # void *memset(void *p, char ch, size_t count) # # - NOTE: must not use any stack. exception detection performs function return # to caller's fixup routine, aborting the remainder of the set # GR4, GR7, GR8, and GR11 must be managed # ############################################################################### .globl memset,__memset_end .type memset,@function memset: orcc.p gr10,gr0,gr5,icc3 ; GR5 = count andi gr9,#0xff,gr9 or.p gr8,gr0,gr4 ; GR4 = address beqlr icc3,#0 # conditionally write a byte to 2b-align the address setlos.p #1,gr6 andicc gr4,#1,gr0,icc0 ckne icc0,cc7 cstb.p gr9,@(gr4,gr0) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cadd.p gr4,gr6,gr4 ,cc7,#1 beqlr icc3,#0 # conditionally write a word to 4b-align the address andicc.p gr4,#2,gr0,icc0 subicc gr5,#2,gr0,icc1 setlos.p #2,gr6 ckne icc0,cc7 slli.p gr9,#8,gr12 ; need to double up the pattern cknc icc1,cc5 or.p gr9,gr12,gr12 andcr cc7,cc5,cc7 csth.p gr12,@(gr4,gr0) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cadd.p gr4,gr6,gr4 ,cc7,#1 beqlr icc3,#0 # conditionally write a dword to 8b-align the address andicc.p gr4,#4,gr0,icc0 subicc gr5,#4,gr0,icc1 setlos.p #4,gr6 ckne icc0,cc7 slli.p gr12,#16,gr13 ; need to quadruple-up the pattern cknc icc1,cc5 or.p gr13,gr12,gr12 andcr cc7,cc5,cc7 cst.p gr12,@(gr4,gr0) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cadd.p gr4,gr6,gr4 ,cc7,#1 beqlr icc3,#0 or.p gr12,gr12,gr13 ; need to octuple-up the pattern # the address is now 8b-aligned - loop around writing 64b chunks setlos #8,gr7 subi.p gr4,#8,gr4 ; store with update index does weird stuff setlos #64,gr6 subicc gr5,#64,gr0,icc0 0: cknc icc0,cc7 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 subicc gr5,#64,gr0,icc0 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 beqlr icc3,#0 bnc icc0,#2,0b # now do 32-byte remnant subicc.p gr5,#32,gr0,icc0 setlos #32,gr6 cknc icc0,cc7 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 setlos #16,gr6 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 subicc gr5,#16,gr0,icc0 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 beqlr icc3,#0 # now do 16-byte remnant cknc icc0,cc7 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 beqlr icc3,#0 # now do 8-byte remnant subicc gr5,#8,gr0,icc1 cknc icc1,cc7 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 setlos.p #4,gr7 beqlr icc3,#0 # now do 4-byte remnant subicc gr5,#4,gr0,icc0 addi.p gr4,#4,gr4 cknc icc0,cc7 cstu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 subicc.p gr5,#2,gr0,icc1 beqlr icc3,#0 # now do 2-byte remnant setlos #2,gr7 addi.p gr4,#2,gr4 cknc icc1,cc7 csthu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 subicc.p gr5,#1,gr0,icc0 beqlr icc3,#0 # now do 1-byte remnant setlos #0,gr7 addi.p gr4,#2,gr4 cknc icc0,cc7 cstb.p gr12,@(gr4,gr0) ,cc7,#1 bralr __memset_end: .size memset, __memset_end-memset ############################################################################### # # clear memory in userspace # - return the number of bytes that could not be cleared (0 on complete success) # # long __memset_user(void *p, size_t count) # ############################################################################### .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler .type __memset_user,@function __memset_user: movsg lr,gr11 # abuse memset to do the dirty work or.p gr9,gr9,gr10 setlos #0,gr9 call memset __memset_user_error_lr: jmpl.p @(gr11,gr0) setlos #0,gr8 # deal any exception generated by memset # GR4 - memset's address tracking pointer # GR7 - memset's step value (index register for store insns) # GR8 - memset's original start address # GR10 - memset's original count __memset_user_error_handler: add.p gr4,gr7,gr4 add gr8,gr10,gr8 jmpl.p @(gr11,gr0) sub gr8,gr4,gr8 ; we return the amount left uncleared .size __memset_user, .-__memset_user |