Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/cpufeature.h> /* * memcpy - Copy a memory block. * * Input: * rdi destination * rsi source * rdx count * * Output: * rax original destination */ ALIGN memcpy_c: CFI_STARTPROC movq %rdi,%rax movl %edx,%ecx shrl $3,%ecx andl $7,%edx rep movsq movl %edx,%ecx rep movsb ret CFI_ENDPROC ENDPROC(memcpy_c) ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC pushq %rbx CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rbx, 0 movq %rdi,%rax movl %edx,%ecx shrl $6,%ecx jz .Lhandle_tail .p2align 4 .Lloop_64: decl %ecx movq (%rsi),%r11 movq 8(%rsi),%r8 movq %r11,(%rdi) movq %r8,1*8(%rdi) movq 2*8(%rsi),%r9 movq 3*8(%rsi),%r10 movq %r9,2*8(%rdi) movq %r10,3*8(%rdi) movq 4*8(%rsi),%r11 movq 5*8(%rsi),%r8 movq %r11,4*8(%rdi) movq %r8,5*8(%rdi) movq 6*8(%rsi),%r9 movq 7*8(%rsi),%r10 movq %r9,6*8(%rdi) movq %r10,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi jnz .Lloop_64 .Lhandle_tail: movl %edx,%ecx andl $63,%ecx shrl $3,%ecx jz .Lhandle_7 .p2align 4 .Lloop_8: decl %ecx movq (%rsi),%r8 movq %r8,(%rdi) leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jnz .Lloop_8 .Lhandle_7: movl %edx,%ecx andl $7,%ecx jz .Lende .p2align 4 .Lloop_1: movb (%rsi),%r8b movb %r8b,(%rdi) incq %rdi incq %rsi decl %ecx jnz .Lloop_1 .Lende: popq %rbx CFI_ADJUST_CFA_OFFSET -8 CFI_RESTORE rbx ret .Lfinal: CFI_ENDPROC ENDPROC(memcpy) ENDPROC(__memcpy) /* Some CPUs run faster using the string copy instructions. It is also a lot simpler. Use this when possible */ .section .altinstr_replacement,"ax" 1: .byte 0xeb /* jmp <disp8> */ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 2: .previous .section .altinstructions,"a" .align 8 .quad memcpy .quad 1b .byte X86_FEATURE_REP_GOOD /* Replace only beginning, memcpy is used to apply alternatives, so it * is silly to overwrite itself with nops - reboot is only outcome... */ .byte 2b - 1b .byte 2b - 1b .previous |