/src/kamailio/src/core/fastlock.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * fast architecture specific locking |
3 | | * |
4 | | * Copyright (C) 2001-2003 FhG Fokus |
5 | | * |
6 | | * Copyright (C) 2017 Core Network Dynamics for ARM8 (aarch64) support |
7 | | * |
8 | | * Permission to use, copy, modify, and distribute this software for any |
9 | | * purpose with or without fee is hereby granted, provided that the above |
10 | | * copyright notice and this permission notice appear in all copies. |
11 | | * |
12 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
13 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
14 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
15 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
16 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
17 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
18 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
19 | | */ |
20 | | /*! |
21 | | * \file |
22 | | * \brief Kamailio core :: fast architecture specific locking |
23 | | * \author andrei |
24 | | * \ingroup core |
25 | | * Module: \ref core |
26 | | * WARNING: the code was not tested on the following architectures: |
27 | | * - alpha (cross-compiles ok, no test) |
28 | | * - mips64 (cross-compiles ok) |
29 | | * - ppc64 (compiles ok) |
30 | | * - sparc32 (tested on a sparc64) |
31 | | */ |
32 | | |
33 | | |
34 | | #ifndef fastlock_h |
35 | | #define fastlock_h |
36 | | |
37 | | #include "sched_yield.h" |
38 | | |
39 | | |
40 | | #define SPIN_OPTIMIZE /* if defined optimize spining on the lock: |
41 | | try first the lock with non-atomic/non memory locking |
42 | | operations, and only if the lock appears to be free |
43 | | switch to the more expensive version */ |
44 | | |
45 | | typedef volatile int fl_lock_t; |
46 | | |
47 | | |
48 | | #define init_lock(l) (l) = 0 |
49 | | |
50 | | |
51 | | /* what membar to use (if any) after taking a lock. This |
52 | | * was separated from the lock code to allow better optimizations. |
53 | | * e.g.: use the membar_getlock only after getting the lock and don't use |
54 | | * it if lock_get fails / when spinning on tsl. |
55 | | * There is no corresponding membar_release_lock (because lock_release |
56 | | * must always include the needed memory barrier). |
57 | | * WARNING: this is intended only for internal fastlock use*/ |
58 | | #if defined(__CPU_i386) || defined(__CPU_x86_64) |
59 | | #define membar_getlock() /* not needed on x86 */ |
60 | | |
61 | | #elif defined(__CPU_sparc64) |
62 | | #ifndef NOSMP |
63 | | #define membar_getlock() \ |
64 | | asm volatile("membar #StoreStore | #StoreLoad \n\t" : : : "memory") |
65 | | /* can be either StoreStore|StoreLoad or LoadStore|LoadLoad |
66 | | * since ldstub acts both as a store and as a load */ |
67 | | #else |
68 | | /* no need for a compiler barrier, that is already included in lock_get/tsl*/ |
69 | | #define membar_getlock() /* not needed if no smp*/ |
70 | | #endif /* NOSMP */ |
71 | | |
72 | | #elif defined(__CPU_sparc) |
73 | | #define membar_getlock() /* no need for a compiler barrier, already included */ |
74 | | |
75 | | #elif defined __CPU_arm |
76 | | #ifndef NOSMP |
77 | | #warning smp not supported on arm < 6 (no membars), try compiling with -DNOSMP |
78 | | #endif /* NOSMP */ |
79 | | #define membar_getlock() |
80 | | |
81 | | #elif defined __CPU_arm6 |
82 | | #ifndef NOSMP |
83 | | #define membar_getlock() \ |
84 | | asm volatile("mcr p15, 0, %0, c7, c10, 5" : : "r"(0) : "memory") |
85 | | #else /* NOSMP */ |
86 | | #define membar_getlock() |
87 | | #endif /* NOSMP */ |
88 | | |
89 | | #elif defined __CPU_arm7 |
90 | | #ifndef NOSMP |
91 | | #define membar_getlock() asm volatile("dmb" : : : "memory") |
92 | | #else /* NOSMP */ |
93 | | #define membar_getlock() |
94 | | #endif /* NOSMP */ |
95 | | |
96 | | #elif defined(__CPU_aarch64) |
97 | | #ifndef NOSMP |
98 | | #warning smp not supported on arm* (no membars), try compiling with -DNOSMP |
99 | | #endif /* NOSMP */ |
100 | | #define membar_getlock() |
101 | | |
102 | | #elif defined(__CPU_ppc) || defined(__CPU_ppc64) |
103 | | #ifndef NOSMP |
104 | | #define membar_getlock() asm volatile("lwsync \n\t" : : : "memory"); |
105 | | #else |
106 | | #define membar_getlock() |
107 | | #endif /* NOSMP */ |
108 | | |
109 | | #elif defined __CPU_mips2 || defined __CPU_mips64 |
110 | | #ifndef NOSMP |
111 | | #define membar_getlock() asm volatile("sync \n\t" : : : "memory"); |
112 | | #else |
113 | | #define membar_getlock() |
114 | | #endif /* NOSMP */ |
115 | | |
116 | | #elif defined __CPU_mips |
117 | | #ifndef NOSMP |
118 | | #warning smp not supported on mips1 (no membars), try compiling with -DNOSMP |
119 | | #endif |
120 | | #define membar_getlock() |
121 | | |
122 | | #elif defined __CPU_alpha |
123 | | #ifndef NOSMP |
124 | | #define membar_getlock() asm volatile("mb \n\t" : : : "memory"); |
125 | | #else |
126 | | #define membar_getlock() |
127 | | #endif /* NOSMP */ |
128 | | |
129 | | #else /* __CPU_xxx */ |
130 | | #error "unknown architecture" |
131 | | #endif |
132 | | |
133 | | |
134 | | /*test and set lock, ret !=0 if lock held by someone else, 0 otherwise |
135 | | * WARNING: no memory barriers included, if you use this function directly |
136 | | * (not recommended) and it gets the lock (ret==0), you should call |
137 | | * membar_getlock() after it */ |
138 | | inline static int tsl(fl_lock_t *lock) |
139 | 0 | { |
140 | 0 | int val; |
141 | 0 |
|
142 | 0 | #if defined(__CPU_i386) || defined(__CPU_x86_64) |
143 | 0 |
|
144 | 0 | #ifdef NOSMP |
145 | 0 | asm volatile(" xor %0, %0 \n\t" |
146 | 0 | " btsl $0, %2 \n\t" |
147 | 0 | " setc %b0 \n\t" |
148 | 0 | : "=&q"(val), "=m"(*lock) |
149 | 0 | : "m"(*lock) |
150 | 0 | : "memory", "cc"); |
151 | 0 | #else |
152 | 0 | asm volatile( |
153 | 0 | #ifdef SPIN_OPTIMIZE |
154 | 0 | " cmpb $0, %2 \n\t" |
155 | 0 | " mov $1, %0 \n\t" |
156 | 0 | " jnz 1f \n\t" |
157 | 0 | #else |
158 | 0 | " mov $1, %0 \n\t" |
159 | 0 | #endif |
160 | 0 | " xchgb %2, %b0 \n\t" |
161 | 0 | "1: \n\t" |
162 | 0 | : "=&q"(val), "=m"(*lock) |
163 | 0 | : "m"(*lock) |
164 | 0 | : "memory" |
165 | 0 | #ifdef SPIN_OPTIMIZE |
166 | 0 | , |
167 | 0 | "cc" |
168 | 0 | #endif |
169 | 0 | ); |
170 | 0 | #endif /*NOSMP*/ |
171 | 0 | #elif defined(__CPU_sparc64) |
172 | 0 | asm volatile( |
173 | 0 | #ifdef SPIN_OPTIMIZE |
174 | 0 | " ldub [%2], %0 \n\t" |
175 | 0 | " brnz,a,pn %0, 1f \n\t" |
176 | 0 | " nop \n\t" |
177 | 0 | #endif |
178 | 0 | " ldstub [%2], %0 \n\t" |
179 | 0 | "1: \n\t" |
180 | 0 | /* membar_getlock must be called outside this function */ |
181 | 0 | : "=&r"(val), "=m"(*lock) |
182 | 0 | : "r"(lock) |
183 | 0 | : "memory"); |
184 | 0 | #elif defined(__CPU_sparc) |
185 | 0 | asm volatile( |
186 | 0 | #ifdef SPIN_OPTIMIZE |
187 | 0 | " ldub [%2], %0 \n\t" |
188 | 0 | " tst %0 \n\t" |
189 | 0 | " bne,a 1f \n\t" |
190 | 0 | " nop \n\t" |
191 | 0 | #endif |
192 | 0 | " ldstub [%2], %0 \n\t" |
193 | 0 | "1: \n\t" |
194 | 0 | /* membar_getlock must be called outside this function */ |
195 | 0 | : "=&r"(val), "=m"(*lock) |
196 | 0 | : "r"(lock) |
197 | 0 | : "memory" |
198 | 0 | #ifdef SPIN_OPTIMIZE |
199 | 0 | , |
200 | 0 | "cc" |
201 | 0 | #endif |
202 | 0 | ); |
203 | 0 | #elif defined __CPU_arm |
204 | 0 | asm volatile("swp %0, %2, [%3] \n\t" |
205 | 0 | : "=&r"(val), "=m"(*lock) |
206 | 0 | : "r"(1), "r"(lock) |
207 | 0 | : "memory"); |
208 | 0 | #elif defined __CPU_arm6 || defined __CPU_arm7 |
209 | 0 | asm volatile(" ldrex %0, [%2] \n\t" |
210 | 0 | " cmp %0, #0 \n\t" |
211 | 0 | " strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */ |
212 | 0 | /* if %0!=0 => either it was 1 initially or was 0 |
213 | 0 | * and somebody changed it just before the strexeq (so the |
214 | 0 | * lock is taken) => it's safe to return %0 */ |
215 | 0 | /* membar_getlock must be called outside this function */ |
216 | 0 | : "=&r"(val), "=m"(*lock) |
217 | 0 | : "r"(lock), "r"(1) |
218 | 0 | : "cc"); |
219 | 0 |
|
220 | 0 | #elif defined __CPU_aarch64 |
221 | 0 | int res = 0; |
222 | 0 | int one = 1; |
223 | 0 | asm volatile("1: ldaxr %w0, %2 \n\t" |
224 | 0 | " stlxr %w1, %w3, %2 \n\t" |
225 | 0 | " cbnz %w1, 1b \n\t" |
226 | 0 | : "=&r"(val), "=&r"(res), "+Q"(*lock) |
227 | 0 | : "r"(one) |
228 | 0 | : "cc", "memory"); |
229 | 0 |
|
230 | 0 |
|
231 | 0 | #elif defined(__CPU_ppc) || defined(__CPU_ppc64) |
232 | 0 | asm volatile("1: \n\t" |
233 | 0 | #ifdef SPIN_OPTIMIZE |
234 | 0 | " lwzx %0, 0, %2 \n\t" |
235 | 0 | " cmpwi %0, 0 \n\t" |
236 | 0 | " bne- 2f \n\t" /* predict: not taken */ |
237 | 0 | #endif |
238 | 0 | " lwarx %0, 0, %2\n\t" |
239 | 0 | " cmpwi %0, 0\n\t" |
240 | 0 | " bne- 2f\n\t" |
241 | 0 | " stwcx. %3, 0, %2\n\t" |
242 | 0 | " bne- 1b\n\t" |
243 | 0 | /* membar_getlock must be called outside this function */ |
244 | 0 | "2:\n\t" |
245 | 0 | : "=&r"(val), "=m"(*lock) |
246 | 0 | : "r"(lock), "r"(1) |
247 | 0 | : "memory", "cc"); |
248 | 0 | #elif defined __CPU_mips2 || (defined __CPU_mips && defined MIPS_HAS_LLSC) \ |
249 | 0 | || defined __CPU_mips64 |
250 | 0 | long tmp; |
251 | 0 |
|
252 | 0 | asm volatile(".set push \n\t" |
253 | 0 | ".set noreorder\n\t" |
254 | 0 | ".set mips2 \n\t" |
255 | 0 | #ifdef SPIN_OPTIMIZE |
256 | 0 | " lw %1, %2 \n\t" |
257 | 0 | " bne %1, $0, 2f \n\t" |
258 | 0 | " nop \n\t" |
259 | 0 | #endif |
260 | 0 | "1: ll %1, %2 \n\t" |
261 | 0 | " bne %1, $0, 2f \n\t" |
262 | 0 | " li %0, 1 \n\t" /* delay slot */ |
263 | 0 | " sc %0, %2 \n\t" |
264 | 0 | " beqz %0, 1b \n\t" |
265 | 0 | " nop \n\t" |
266 | 0 | "2: \n\t" |
267 | 0 | /* membar_getlock must be called outside this function */ |
268 | 0 | ".set pop\n\t" |
269 | 0 | : "=&r"(tmp), "=&r"(val), "=m"(*lock) |
270 | 0 | : "m"(*lock) |
271 | 0 | : "memory"); |
272 | 0 | #elif defined __CPU_alpha |
273 | 0 | long tmp; |
274 | 0 | tmp = 0; |
275 | 0 | /* lock low bit set to 1 when the lock is hold and to 0 otherwise */ |
276 | 0 | asm volatile( |
277 | 0 | "1: ldl %0, %1 \n\t" |
278 | 0 | " blbs %0, 2f \n\t" /* optimization if locked */ |
279 | 0 | " ldl_l %0, %1 \n\t" |
280 | 0 | " blbs %0, 2f \n\t" |
281 | 0 | " lda %2, 1 \n\t" /* or: or $31, 1, %2 ??? */ |
282 | 0 | " stl_c %2, %1 \n\t" |
283 | 0 | " beq %2, 3f \n\t" /* back cond. jumps are always predicted to be |
284 | 0 | taken => make forward jump */ |
285 | 0 | /* membar_getlock must be called outside this function */ |
286 | 0 | "2: \n\t" |
287 | 0 | ".subsection 2 \n\t" |
288 | 0 | "3: br 1b \n\t" |
289 | 0 | ".previous \n\t" |
290 | 0 | : "=&r"(val), "=m"(*lock), "=&r"(tmp) |
291 | 0 | : "m"(*lock) |
292 | 0 | : "memory"); |
293 | 0 | #else |
294 | 0 | #error "unknown architecture" |
295 | 0 | #endif |
296 | 0 | return val; |
297 | 0 | } Unexecuted instantiation: fuzz_parse_msg.c:tsl Unexecuted instantiation: fuzz_uri.c:tsl |
298 | | |
299 | | |
300 | | inline static void get_lock(fl_lock_t *lock) |
301 | 0 | { |
302 | 0 | #ifdef ADAPTIVE_WAIT |
303 | 0 | int i = ADAPTIVE_WAIT_LOOPS; |
304 | 0 | #endif |
305 | 0 |
|
306 | 0 | while(tsl(lock)) { |
307 | 0 | #ifdef BUSY_WAIT |
308 | 0 | #elif defined ADAPTIVE_WAIT |
309 | 0 | if(i > 0) |
310 | 0 | i--; |
311 | 0 | else |
312 | 0 | sched_yield(); |
313 | 0 | #else |
314 | 0 | sched_yield(); |
315 | 0 | #endif |
316 | 0 | } |
317 | 0 | membar_getlock(); |
318 | 0 | } Unexecuted instantiation: fuzz_parse_msg.c:get_lock Unexecuted instantiation: fuzz_uri.c:get_lock |
319 | | |
320 | | |
321 | | /* like get_lock, but it doesn't wait. If it gets the lock returns 0, |
322 | | * <0 otherwise (-1) */ |
323 | | inline static int try_lock(fl_lock_t *lock) |
324 | 0 | { |
325 | 0 | if(tsl(lock)) { |
326 | 0 | return -1; |
327 | 0 | } |
328 | 0 | membar_getlock(); |
329 | 0 | return 0; |
330 | 0 | } Unexecuted instantiation: fuzz_parse_msg.c:try_lock Unexecuted instantiation: fuzz_uri.c:try_lock |
331 | | |
332 | | |
333 | | inline static void release_lock(fl_lock_t *lock) |
334 | 0 | { |
335 | 0 | #if defined(__CPU_i386) |
336 | 0 | #ifdef NOSMP |
337 | 0 | asm volatile(" movb $0, %0 \n\t" : "=m"(*lock) : : "memory"); |
338 | 0 | #else /* ! NOSMP */ |
339 | 0 | int val; |
340 | 0 | /* a simple mov $0, (lock) does not force StoreStore ordering on all |
341 | 0 | x86 versions and it doesn't seem to force LoadStore either */ |
342 | 0 | asm volatile(" xchgb %b0, %1 \n\t" |
343 | 0 | : "=q"(val), "=m"(*lock) |
344 | 0 | : "0"(0) |
345 | 0 | : "memory"); |
346 | 0 | #endif /* NOSMP */ |
347 | 0 | #elif defined(__CPU_x86_64) |
348 | 0 | asm volatile( |
349 | 0 | " movb $0, %0 \n\t" /* on amd64 membar StoreStore | LoadStore is |
350 | 0 | implicit (at least on the same mem. type) */ |
351 | 0 | : "=m"(*lock) |
352 | 0 | : |
353 | 0 | : "memory"); |
354 | 0 | #elif defined(__CPU_sparc64) || defined(__CPU_sparc) |
355 | 0 | asm volatile( |
356 | 0 | #ifndef NOSMP |
357 | 0 | #ifdef __CPU_sparc64 |
358 | 0 | "membar #LoadStore | #StoreStore \n\t" |
359 | 0 | #else /* __CPU_sparc */ |
360 | 0 | "stbar \n\t" |
361 | 0 | #endif /* __CPU_sparc64 */ |
362 | 0 | #endif |
363 | 0 | "stb %%g0, [%1] \n\t" |
364 | 0 | : "=m"(*lock) |
365 | 0 | : "r"(lock) |
366 | 0 | : "memory"); |
367 | 0 | #elif defined __CPU_arm || defined __CPU_arm6 || defined __CPU_arm7 |
368 | 0 | #if !defined NOSMP && defined __CPU_arm |
369 | 0 | #warning arm* smp mode not supported (no membars), try compiling with -DNOSMP |
370 | 0 | #endif |
371 | 0 | /* missuse membar_getlock */ |
372 | 0 | membar_getlock(); |
373 | 0 | asm volatile(" str %1, [%2] \n\r" |
374 | 0 | : "=m"(*lock) |
375 | 0 | : "r"(0), "r"(lock) |
376 | 0 | : "cc", "memory"); |
377 | 0 | #ifdef __CPU_arm6 |
378 | 0 | /* drain store buffer: drain the per processor buffer into the L1 cache |
379 | 0 | making all the changes visible to other processors */ |
380 | 0 | asm volatile("mcr p15, 0, %0, c7, c10, 4 \n\r" /* DSB equiv. on arm6*/ |
381 | 0 | : |
382 | 0 | : "r"(0) |
383 | 0 | : "memory"); |
384 | 0 | #elif defined __CPU_arm7 |
385 | 0 | /* drain store buffer: drain the per processor buffer into the L1 cache |
386 | 0 | making all the changes visible to other processors */ |
387 | 0 | asm volatile("dsb \n\r" : : : "memory"); |
388 | 0 | #endif /* __CPU_arm6 / __CPU_arm7 */ |
389 | 0 |
|
390 | 0 | #elif defined __CPU_aarch64 |
391 | 0 | #ifndef NOSMP |
392 | 0 | #warning arm* smp mode not supported (no membars), try compiling with -DNOSMP |
393 | 0 | #endif |
394 | 0 | asm volatile(" stlr %w1, %0 \n\t" : "=Q"(*lock) : "r"(0) : "memory"); |
395 | 0 |
|
396 | 0 |
|
397 | 0 | #elif defined(__CPU_ppc) || defined(__CPU_ppc64) |
398 | 0 | asm volatile( |
399 | 0 | /* "sync\n\t" lwsync is faster and will work |
400 | 0 | * here too |
401 | 0 | * [IBM Prgramming Environments Manual, D.4.2.2] |
402 | 0 | */ |
403 | 0 | "lwsync\n\t" |
404 | 0 | "stwx %1, 0, %2\n\t" |
405 | 0 | : "=m"(*lock) |
406 | 0 | : "r"(0), "r"(lock) |
407 | 0 | : "memory"); |
408 | 0 | #elif defined __CPU_mips2 || (defined __CPU_mips && defined MIPS_HAS_LLSC) \ |
409 | 0 | || defined __CPU_mips64 |
410 | 0 | asm volatile(".set push \n\t" |
411 | 0 | ".set noreorder \n\t" |
412 | 0 | ".set mips2 \n\t" |
413 | 0 | #ifndef NOSMP |
414 | 0 | #ifdef __CPU_mips |
415 | 0 | #warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP |
416 | 0 | #else |
417 | 0 | " sync \n\t" |
418 | 0 | #endif |
419 | 0 | #endif |
420 | 0 | " sw $0, %0 \n\t" |
421 | 0 | ".set pop \n\t" |
422 | 0 | : "=m"(*lock) |
423 | 0 | : /* no input */ |
424 | 0 | : "memory"); |
425 | 0 | #elif defined __CPU_alpha |
426 | 0 | asm volatile( |
427 | 0 | #ifndef NOSMP |
428 | 0 | " mb \n\t" |
429 | 0 | #endif |
430 | 0 | " stl $31, %0 \n\t" |
431 | 0 | : "=m"(*lock) |
432 | 0 | : /* no input*/ |
433 | 0 | : "memory" /* because of the mb */ |
434 | 0 | ); |
435 | 0 | #else |
436 | 0 | #error "unknown architecture" |
437 | 0 | #endif |
438 | 0 | } Unexecuted instantiation: fuzz_parse_msg.c:release_lock Unexecuted instantiation: fuzz_uri.c:release_lock |
439 | | |
440 | | |
441 | | #endif |