Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | /* Copyright 2002 Andi Kleen */ /* * memcpy - Copy a memory block. * * Input: * rdi destination * rsi source * rdx count * * Output: * rax original destination */ // #define FIX_ALIGNMENT .globl __memcpy .globl memcpy .p2align __memcpy: memcpy: pushq %rbx movq %rdi,%rax #ifdef FIX_ALIGNMENT movl %edi,%ecx andl $7,%ecx jnz bad_alignment after_bad_alignment: #endif movq %rdx,%rcx movl $64,%ebx shrq $6,%rcx jz handle_tail loop_64: movq (%rsi),%r11 movq 8(%rsi),%r8 movq 2*8(%rsi),%r9 movq 3*8(%rsi),%r10 movq %r11,(%rdi) movq %r8,1*8(%rdi) movq %r9,2*8(%rdi) movq %r10,3*8(%rdi) movq 4*8(%rsi),%r11 movq 5*8(%rsi),%r8 movq 6*8(%rsi),%r9 movq 7*8(%rsi),%r10 movq %r11,4*8(%rdi) movq %r8,5*8(%rdi) movq %r9,6*8(%rdi) movq %r10,7*8(%rdi) addq %rbx,%rsi addq %rbx,%rdi decl %ecx jnz loop_64 handle_tail: movl %edx,%ecx andl $63,%ecx shrl $3,%ecx jz handle_7 movl $8,%ebx loop_8: movq (%rsi),%r8 movq %r8,(%rdi) addq %rbx,%rdi addq %rbx,%rsi decl %ecx jnz loop_8 handle_7: movl %edx,%ecx andl $7,%ecx jz ende loop_1: movb (%rsi),%r8b movb %r8b,(%rdi) incq %rdi incq %rsi decl %ecx jnz loop_1 ende: sfence popq %rbx ret #ifdef FIX_ALIGNMENT /* align destination */ /* This is simpleminded. For bigger blocks it may make sense to align src and dst to their aligned subset and handle the rest separately */ bad_alignment: movl $8,%r9d subl %ecx,%r9d movl %r9d,%ecx subq %r9,%rdx js small_alignment jz small_alignment align_1: movb (%rsi),%r8b movb %r8b,(%rdi) incq %rdi incq %rsi decl %ecx jnz align_1 jmp after_bad_alignment small_alignment: addq %r9,%rdx jmp handle_7 #endif |