Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/arch/alpha/lib/memset.S * * This is an efficient (and small) implementation of the C library "memset()" * function for the alpha. * * (C) Copyright 1996 Linus Torvalds * * This routine is "moral-ware": you are free to use it any way you wish, and * the only obligation I put on you is a moral one: if you make any improvements * to the routine, please send me your improvements for me to use similarly. * * The scheduling comments are according to the EV5 documentation (and done by * hand, so they might well be incorrect, please do tell me about it..) */ #include <asm/export.h> .set noat .set noreorder .text .globl memset .globl __memset .globl ___memset .globl __memset16 .globl __constant_c_memset .ent ___memset .align 5 ___memset: .frame $30,0,$26,0 .prologue 0 and $17,255,$1 /* E1 */ insbl $17,1,$17 /* .. E0 */ bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ sll $17,16,$1 /* E1 (p-c latency, next cycle) */ bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ sll $17,32,$1 /* E1 (p-c latency, next cycle) */ bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ ldq_u $31,0($30) /* .. E1 */ .align 5 __constant_c_memset: addq $18,$16,$6 /* E0 */ bis $16,$16,$0 /* .. E1 */ xor $16,$6,$1 /* E0 */ ble $18,end /* .. E1 */ bic $1,7,$1 /* E0 */ beq $1,within_one_quad /* .. E1 (note EV5 zero-latency forwarding) */ and $16,7,$3 /* E0 */ beq $3,aligned /* .. E1 (note EV5 zero-latency forwarding) */ ldq_u $4,0($16) /* E0 */ bis $16,$16,$5 /* .. E1 */ insql $17,$16,$2 /* E0 */ subq $3,8,$3 /* .. E1 */ addq $18,$3,$18 /* E0 $18 is new count ($3 is negative) */ mskql $4,$16,$4 /* .. E1 (and possible load stall) */ subq $16,$3,$16 /* E0 $16 is new aligned destination */ bis $2,$4,$1 /* .. E1 */ bis $31,$31,$31 /* E0 */ ldq_u $31,0($30) /* .. E1 */ stq_u $1,0($5) /* E0 */ bis $31,$31,$31 /* .. E1 */ .align 4 aligned: sra $18,3,$3 /* E0 */ and $18,7,$18 /* .. E1 */ bis $16,$16,$5 /* E0 */ beq $3,no_quad /* .. E1 */ .align 3 loop: stq $17,0($5) /* E0 */ subq $3,1,$3 /* .. E1 */ addq $5,8,$5 /* E0 */ bne $3,loop /* .. E1 */ no_quad: bis $31,$31,$31 /* E0 */ beq $18,end /* .. E1 */ ldq $7,0($5) /* E0 */ mskqh $7,$6,$2 /* .. E1 (and load stall) */ insqh $17,$6,$4 /* E0 */ bis $2,$4,$1 /* .. E1 */ stq $1,0($5) /* E0 */ ret $31,($26),1 /* .. E1 */ .align 3 within_one_quad: ldq_u $1,0($16) /* E0 */ insql $17,$16,$2 /* E1 */ mskql $1,$16,$4 /* E0 (after load stall) */ bis $2,$4,$2 /* E0 */ mskql $2,$6,$4 /* E0 */ mskqh $1,$6,$2 /* .. E1 */ bis $2,$4,$1 /* E0 */ stq_u $1,0($16) /* E0 */ end: ret $31,($26),1 /* E1 */ .end ___memset EXPORT_SYMBOL(___memset) EXPORT_SYMBOL(__constant_c_memset) .align 5 .ent __memset16 __memset16: .prologue 0 inswl $17,0,$1 /* E0 */ inswl $17,2,$2 /* E0 */ inswl $17,4,$3 /* E0 */ or $1,$2,$1 /* .. E1 */ inswl $17,6,$4 /* E0 */ or $1,$3,$1 /* .. E1 */ or $1,$4,$17 /* E0 */ br __constant_c_memset /* .. E1 */ .end __memset16 EXPORT_SYMBOL(__memset16) memset = ___memset __memset = ___memset EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) |