aboutsummaryrefslogtreecommitdiff
path: root/string/arm/strcpy.c
blob: b5728a2534f066c8845e2f52ef5b47d579685394 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
 * strcpy
 *
 * Copyright (c) 2008-2020, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */

#if defined (__thumb2__) && !defined (__thumb__)

/* For GLIBC:
#include <string.h>
#include <memcopy.h>

#undef strcmp
*/

#ifdef __thumb2__
#define magic1(REG) "#0x01010101"
#define magic2(REG) "#0x80808080"
#else
#define magic1(REG) #REG
#define magic2(REG) #REG ", lsl #7"
#endif

char* __attribute__((naked))
__strcpy_arm (char* dst, const char* src)
{
  __asm__ (
       "pld	[r1, #0]\n\t"
       "eor	r2, r0, r1\n\t"
       "mov	ip, r0\n\t"
       "tst	r2, #3\n\t"
       "bne	4f\n\t"
       "tst	r1, #3\n\t"
       "bne	3f\n"
  "5:\n\t"
# ifndef __thumb2__
       "str	r5, [sp, #-4]!\n\t"
       "mov	r5, #0x01\n\t"
       "orr	r5, r5, r5, lsl #8\n\t"
       "orr	r5, r5, r5, lsl #16\n\t"
# endif

       "str	r4, [sp, #-4]!\n\t"
       "tst	r1, #4\n\t"
       "ldr	r3, [r1], #4\n\t"
       "beq	2f\n\t"
       "sub	r2, r3, "magic1(r5)"\n\t"
       "bics	r2, r2, r3\n\t"
       "tst	r2, "magic2(r5)"\n\t"
       "itt	eq\n\t"
       "streq	r3, [ip], #4\n\t"
       "ldreq	r3, [r1], #4\n"
       "bne	1f\n\t"
       /* Inner loop.  We now know that r1 is 64-bit aligned, so we
	  can safely fetch up to two words.  This allows us to avoid
	  load stalls.  */
       ".p2align 2\n"
  "2:\n\t"
       "pld	[r1, #8]\n\t"
       "ldr	r4, [r1], #4\n\t"
       "sub	r2, r3, "magic1(r5)"\n\t"
       "bics	r2, r2, r3\n\t"
       "tst	r2, "magic2(r5)"\n\t"
       "sub	r2, r4, "magic1(r5)"\n\t"
       "bne	1f\n\t"
       "str	r3, [ip], #4\n\t"
       "bics	r2, r2, r4\n\t"
       "tst	r2, "magic2(r5)"\n\t"
       "itt	eq\n\t"
       "ldreq	r3, [r1], #4\n\t"
       "streq	r4, [ip], #4\n\t"
       "beq	2b\n\t"
       "mov	r3, r4\n"
  "1:\n\t"
# ifdef __ARMEB__
       "rors	r3, r3, #24\n\t"
# endif
       "strb	r3, [ip], #1\n\t"
       "tst	r3, #0xff\n\t"
# ifdef __ARMEL__
       "ror	r3, r3, #8\n\t"
# endif
       "bne	1b\n\t"
       "ldr	r4, [sp], #4\n\t"
# ifndef __thumb2__
       "ldr	r5, [sp], #4\n\t"
# endif
       "BX LR\n"

       /* Strings have the same offset from word alignment, but it's
	  not zero.  */
  "3:\n\t"
       "tst	r1, #1\n\t"
       "beq	1f\n\t"
       "ldrb	r2, [r1], #1\n\t"
       "strb	r2, [ip], #1\n\t"
       "cmp	r2, #0\n\t"
       "it	eq\n"
       "BXEQ LR\n"
  "1:\n\t"
       "tst	r1, #2\n\t"
       "beq	5b\n\t"
       "ldrh	r2, [r1], #2\n\t"
# ifdef __ARMEB__
       "tst	r2, #0xff00\n\t"
       "iteet	ne\n\t"
       "strneh	r2, [ip], #2\n\t"
       "lsreq	r2, r2, #8\n\t"
       "streqb	r2, [ip]\n\t"
       "tstne	r2, #0xff\n\t"
# else
       "tst	r2, #0xff\n\t"
       "itet	ne\n\t"
       "strneh	r2, [ip], #2\n\t"
       "streqb	r2, [ip]\n\t"
       "tstne	r2, #0xff00\n\t"
# endif
       "bne	5b\n\t"
       "BX LR\n"

       /* src and dst do not have a common word-alignement.  Fall back to
	  byte copying.  */
  "4:\n\t"
       "ldrb	r2, [r1], #1\n\t"
       "strb	r2, [ip], #1\n\t"
       "cmp	r2, #0\n\t"
       "bne	4b\n\t"
       "BX LR");
}
/* For GLIBC: libc_hidden_builtin_def (strcpy) */

#endif /* defined (__thumb2__) && !defined (__thumb__)  */