/src/cpython/Python/perf_trampoline.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | |
3 | | Perf trampoline instrumentation |
4 | | =============================== |
5 | | |
6 | | This file contains instrumentation to allow to associate |
7 | | calls to the CPython eval loop back to the names of the Python |
8 | | functions and filename being executed. |
9 | | |
10 | | Many native performance profilers like the Linux perf tools are |
11 | | only available to 'see' the C stack when sampling from the profiled |
12 | | process. This means that if we have the following python code: |
13 | | |
14 | | import time |
15 | | def foo(n): |
16 | | # Some CPU intensive code |
17 | | |
18 | | def bar(n): |
19 | | foo(n) |
20 | | |
21 | | def baz(n): |
22 | | bar(n) |
23 | | |
24 | | baz(10000000) |
25 | | |
26 | | A performance profiler that is only able to see native frames will |
27 | | produce the following backtrace when sampling from foo(): |
28 | | |
29 | | _PyEval_EvalFrameDefault -----> Evaluation frame of foo() |
30 | | _PyEval_Vector |
31 | | _PyFunction_Vectorcall |
32 | | PyObject_Vectorcall |
33 | | call_function |
34 | | |
35 | | _PyEval_EvalFrameDefault ------> Evaluation frame of bar() |
36 | | _PyEval_EvalFrame |
37 | | _PyEval_Vector |
38 | | _PyFunction_Vectorcall |
39 | | PyObject_Vectorcall |
40 | | call_function |
41 | | |
42 | | _PyEval_EvalFrameDefault -------> Evaluation frame of baz() |
43 | | _PyEval_EvalFrame |
44 | | _PyEval_Vector |
45 | | _PyFunction_Vectorcall |
46 | | PyObject_Vectorcall |
47 | | call_function |
48 | | |
49 | | ... |
50 | | |
51 | | Py_RunMain |
52 | | |
53 | | Because the profiler is only able to see the native frames and the native |
54 | | function that runs the evaluation loop is the same (_PyEval_EvalFrameDefault) |
55 | | then the profiler and any reporter generated by it will not be able to |
56 | | associate the names of the Python functions and the filenames associated with |
57 | | those calls, rendering the results useless in the Python world. |
58 | | |
59 | | To fix this problem, we introduce the concept of a trampoline frame. A |
60 | | trampoline frame is a piece of code that is unique per Python code object that |
61 | | is executed before entering the CPython eval loop. This piece of code just |
62 | | calls the original Python evaluation function (_PyEval_EvalFrameDefault) and |
63 | | forwards all the arguments received. In this way, when a profiler samples |
64 | | frames from the previous example it will see; |
65 | | |
66 | | _PyEval_EvalFrameDefault -----> Evaluation frame of foo() |
67 | | [Jit compiled code 3] |
68 | | _PyEval_Vector |
69 | | _PyFunction_Vectorcall |
70 | | PyObject_Vectorcall |
71 | | call_function |
72 | | |
73 | | _PyEval_EvalFrameDefault ------> Evaluation frame of bar() |
74 | | [Jit compiled code 2] |
75 | | _PyEval_EvalFrame |
76 | | _PyEval_Vector |
77 | | _PyFunction_Vectorcall |
78 | | PyObject_Vectorcall |
79 | | call_function |
80 | | |
81 | | _PyEval_EvalFrameDefault -------> Evaluation frame of baz() |
82 | | [Jit compiled code 1] |
83 | | _PyEval_EvalFrame |
84 | | _PyEval_Vector |
85 | | _PyFunction_Vectorcall |
86 | | PyObject_Vectorcall |
87 | | call_function |
88 | | |
89 | | ... |
90 | | |
91 | | Py_RunMain |
92 | | |
93 | | When we generate every unique copy of the trampoline (what here we called "[Jit |
94 | | compiled code N]") we write the relationship between the compiled code and the |
95 | | Python function that is associated with it. Every profiler requires this |
96 | | information in a different format. For example, the Linux "perf" profiler |
97 | | requires a file in "/tmp/perf-PID.map" (name and location not configurable) |
98 | | with the following format: |
99 | | |
100 | | <compiled code address> <compiled code size> <name of the compiled code> |
101 | | |
102 | | If this file is available when "perf" generates reports, it will automatically |
103 | | associate every trampoline with the Python function that it is associated with |
104 | | allowing it to generate reports that include Python information. These reports |
105 | | then can also be filtered in a way that *only* Python information appears. |
106 | | |
107 | | Notice that for this to work, there must be a unique copied of the trampoline |
108 | | per Python code object even if the code in the trampoline is the same. To |
109 | | achieve this we have a assembly template in Objects/asm_trampiline.S that is |
110 | | compiled into the Python executable/shared library. This template generates a |
111 | | symbol that maps the start of the assembly code and another that marks the end |
112 | | of the assembly code for the trampoline. Then, every time we need a unique |
113 | | trampoline for a Python code object, we copy the assembly code into a mmaped |
114 | | area that has executable permissions and we return the start of that area as |
115 | | our trampoline function. |
116 | | |
117 | | Asking for a mmap-ed memory area for trampoline is very wasteful so we |
118 | | allocate big arenas of memory in a single mmap call, we populate the entire |
119 | | arena with copies of the trampoline (this allows us to now have to invalidate |
120 | | the icache for the instructions in the page) and then we return the next |
121 | | available chunk every time someone asks for a new trampoline. We keep a linked |
122 | | list of arenas in case the current memory arena is exhausted and another one is |
123 | | needed. |
124 | | |
125 | | For the best results, Python should be compiled with |
126 | | CFLAGS="-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" as this allows |
127 | | profilers to unwind using only the frame pointer and not on DWARF debug |
128 | | information (note that as trampilines are dynamically generated there won't be |
129 | | any DWARF information available for them). |
130 | | */ |
131 | | |
132 | | #include "Python.h" |
133 | | #include "pycore_ceval.h" // _PyPerf_Callbacks |
134 | | #include "pycore_interpframe.h" // _PyFrame_GetCode() |
135 | | #include "pycore_runtime.h" // _PyRuntime |
136 | | |
137 | | |
138 | | #ifdef PY_HAVE_PERF_TRAMPOLINE |
139 | | |
140 | | #include <fcntl.h> |
141 | | #include <stdio.h> |
142 | | #include <stdlib.h> |
143 | | #include <sys/mman.h> // mmap() |
144 | | #include <sys/types.h> |
145 | | #include <unistd.h> // sysconf() |
146 | | #include <sys/time.h> // gettimeofday() |
147 | | |
148 | | |
149 | | #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) |
150 | | #define PY_HAVE_INVALIDATE_ICACHE |
151 | | |
152 | | #if defined(__clang__) || defined(__GNUC__) |
153 | | extern void __clear_cache(void *, void*); |
154 | | #endif |
155 | | |
156 | | static void invalidate_icache(char* begin, char*end) { |
157 | | #if defined(__clang__) || defined(__GNUC__) |
158 | | return __clear_cache(begin, end); |
159 | | #else |
160 | | return; |
161 | | #endif |
162 | | } |
163 | | #endif |
164 | | |
165 | | /* The function pointer is passed as last argument. The other three arguments |
166 | | * are passed in the same order as the function requires. This results in |
167 | | * shorter, more efficient ASM code for trampoline. |
168 | | */ |
169 | | typedef PyObject *(*py_evaluator)(PyThreadState *, _PyInterpreterFrame *, |
170 | | int throwflag); |
171 | | typedef PyObject *(*py_trampoline)(PyThreadState *, _PyInterpreterFrame *, int, |
172 | | py_evaluator); |
173 | | |
174 | | extern void *_Py_trampoline_func_start; // Start of the template of the |
175 | | // assembly trampoline |
176 | | extern void * |
177 | | _Py_trampoline_func_end; // End of the template of the assembly trampoline |
178 | | |
179 | | struct code_arena_st { |
180 | | char *start_addr; // Start of the memory arena |
181 | | char *current_addr; // Address of the current trampoline within the arena |
182 | | size_t size; // Size of the memory arena |
183 | | size_t size_left; // Remaining size of the memory arena |
184 | | size_t code_size; // Size of the code of every trampoline in the arena |
185 | | struct code_arena_st |
186 | | *prev; // Pointer to the arena or NULL if this is the first arena. |
187 | | }; |
188 | | |
189 | | typedef struct code_arena_st code_arena_t; |
190 | | typedef struct trampoline_api_st trampoline_api_t; |
191 | | |
192 | | enum perf_trampoline_type { |
193 | | PERF_TRAMPOLINE_UNSET = 0, |
194 | | PERF_TRAMPOLINE_TYPE_MAP = 1, |
195 | | PERF_TRAMPOLINE_TYPE_JITDUMP = 2, |
196 | | }; |
197 | | |
198 | 0 | #define perf_status _PyRuntime.ceval.perf.status |
199 | 0 | #define extra_code_index _PyRuntime.ceval.perf.extra_code_index |
200 | 0 | #define perf_code_arena _PyRuntime.ceval.perf.code_arena |
201 | 0 | #define trampoline_api _PyRuntime.ceval.perf.trampoline_api |
202 | | #define perf_map_file _PyRuntime.ceval.perf.map_file |
203 | 0 | #define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork |
204 | 0 | #define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type |
205 | | |
206 | | static void |
207 | | perf_map_write_entry(void *state, const void *code_addr, |
208 | | unsigned int code_size, PyCodeObject *co) |
209 | 0 | { |
210 | 0 | const char *entry = ""; |
211 | 0 | if (co->co_qualname != NULL) { |
212 | 0 | entry = PyUnicode_AsUTF8(co->co_qualname); |
213 | 0 | } |
214 | 0 | const char *filename = ""; |
215 | 0 | if (co->co_filename != NULL) { |
216 | 0 | filename = PyUnicode_AsUTF8(co->co_filename); |
217 | 0 | } |
218 | 0 | size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; |
219 | 0 | char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); |
220 | 0 | if (perf_map_entry == NULL) { |
221 | 0 | return; |
222 | 0 | } |
223 | 0 | snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); |
224 | 0 | PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry); |
225 | 0 | PyMem_RawFree(perf_map_entry); |
226 | 0 | } |
227 | | |
228 | | static void* |
229 | | perf_map_init_state(void) |
230 | 0 | { |
231 | 0 | PyUnstable_PerfMapState_Init(); |
232 | 0 | trampoline_api.code_padding = 0; |
233 | 0 | perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP; |
234 | 0 | return NULL; |
235 | 0 | } |
236 | | |
237 | | static int |
238 | | perf_map_free_state(void *state) |
239 | 0 | { |
240 | 0 | PyUnstable_PerfMapState_Fini(); |
241 | 0 | return 0; |
242 | 0 | } |
243 | | |
244 | | _PyPerf_Callbacks _Py_perfmap_callbacks = { |
245 | | &perf_map_init_state, |
246 | | &perf_map_write_entry, |
247 | | &perf_map_free_state, |
248 | | }; |
249 | | |
250 | | |
251 | 0 | static size_t round_up(int64_t value, int64_t multiple) { |
252 | 0 | if (multiple == 0) { |
253 | | // Avoid division by zero |
254 | 0 | return value; |
255 | 0 | } |
256 | | |
257 | 0 | int64_t remainder = value % multiple; |
258 | 0 | if (remainder == 0) { |
259 | | // Value is already a multiple of 'multiple' |
260 | 0 | return value; |
261 | 0 | } |
262 | | |
263 | | // Calculate the difference to the next multiple |
264 | 0 | int64_t difference = multiple - remainder; |
265 | | |
266 | | // Add the difference to the value |
267 | 0 | int64_t rounded_up_value = value + difference; |
268 | |
|
269 | 0 | return rounded_up_value; |
270 | 0 | } |
271 | | |
272 | | // TRAMPOLINE MANAGEMENT API |
273 | | |
274 | | static int |
275 | | new_code_arena(void) |
276 | 0 | { |
277 | | // non-trivial programs typically need 64 to 256 kiB. |
278 | 0 | size_t mem_size = 4096 * 16; |
279 | 0 | assert(mem_size % sysconf(_SC_PAGESIZE) == 0); |
280 | 0 | char *memory = |
281 | 0 | mmap(NULL, // address |
282 | 0 | mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, |
283 | 0 | -1, // fd (not used here) |
284 | 0 | 0); // offset (not used here) |
285 | 0 | if (memory == MAP_FAILED) { |
286 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
287 | 0 | PyErr_FormatUnraisable("Failed to create new mmap for perf trampoline"); |
288 | 0 | perf_status = PERF_STATUS_FAILED; |
289 | 0 | return -1; |
290 | 0 | } |
291 | 0 | void *start = &_Py_trampoline_func_start; |
292 | 0 | void *end = &_Py_trampoline_func_end; |
293 | 0 | size_t code_size = end - start; |
294 | 0 | size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16); |
295 | | // TODO: Check the effect of alignment of the code chunks. Initial investigation |
296 | | // showed that this has no effect on performance in x86-64 or aarch64 and the current |
297 | | // version has the advantage that the unwinder in GDB can unwind across JIT-ed code. |
298 | | // |
299 | | // We should check the values in the future and see if there is a |
300 | | // measurable performance improvement by rounding trampolines up to 32-bit |
301 | | // or 64-bit alignment. |
302 | |
|
303 | 0 | size_t n_copies = mem_size / chunk_size; |
304 | 0 | for (size_t i = 0; i < n_copies; i++) { |
305 | 0 | memcpy(memory + i * chunk_size, start, code_size * sizeof(char)); |
306 | 0 | } |
307 | | // Some systems may prevent us from creating executable code on the fly. |
308 | 0 | int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); |
309 | 0 | if (res == -1) { |
310 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
311 | 0 | munmap(memory, mem_size); |
312 | 0 | PyErr_FormatUnraisable("Failed to set mmap for perf trampoline to " |
313 | 0 | "PROT_READ | PROT_EXEC"); |
314 | 0 | return -1; |
315 | 0 | } |
316 | | |
317 | | #ifdef PY_HAVE_INVALIDATE_ICACHE |
318 | | // Before the JIT can run a block of code that has been emitted it must invalidate |
319 | | // the instruction cache on some platforms like arm and aarch64. |
320 | | invalidate_icache(memory, memory + mem_size); |
321 | | #endif |
322 | | |
323 | 0 | code_arena_t *new_arena = PyMem_RawCalloc(1, sizeof(code_arena_t)); |
324 | 0 | if (new_arena == NULL) { |
325 | 0 | PyErr_NoMemory(); |
326 | 0 | munmap(memory, mem_size); |
327 | 0 | PyErr_FormatUnraisable("Failed to allocate new code arena struct for perf trampoline"); |
328 | 0 | return -1; |
329 | 0 | } |
330 | | |
331 | 0 | new_arena->start_addr = memory; |
332 | 0 | new_arena->current_addr = memory; |
333 | 0 | new_arena->size = mem_size; |
334 | 0 | new_arena->size_left = mem_size; |
335 | 0 | new_arena->code_size = code_size; |
336 | 0 | new_arena->prev = perf_code_arena; |
337 | 0 | perf_code_arena = new_arena; |
338 | 0 | return 0; |
339 | 0 | } |
340 | | |
341 | | static void |
342 | | free_code_arenas(void) |
343 | 0 | { |
344 | 0 | code_arena_t *cur = perf_code_arena; |
345 | 0 | code_arena_t *prev; |
346 | 0 | perf_code_arena = NULL; // invalid static pointer |
347 | 0 | while (cur) { |
348 | 0 | munmap(cur->start_addr, cur->size); |
349 | 0 | prev = cur->prev; |
350 | 0 | PyMem_RawFree(cur); |
351 | 0 | cur = prev; |
352 | 0 | } |
353 | 0 | } |
354 | | |
355 | | static inline py_trampoline |
356 | | code_arena_new_code(code_arena_t *code_arena) |
357 | 0 | { |
358 | 0 | py_trampoline trampoline = (py_trampoline)code_arena->current_addr; |
359 | 0 | size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16); |
360 | 0 | code_arena->size_left -= total_code_size; |
361 | 0 | code_arena->current_addr += total_code_size; |
362 | 0 | return trampoline; |
363 | 0 | } |
364 | | |
365 | | static inline py_trampoline |
366 | | compile_trampoline(void) |
367 | 0 | { |
368 | 0 | size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16); |
369 | 0 | if ((perf_code_arena == NULL) || |
370 | 0 | (perf_code_arena->size_left <= total_code_size)) { |
371 | 0 | if (new_code_arena() < 0) { |
372 | 0 | return NULL; |
373 | 0 | } |
374 | 0 | } |
375 | 0 | assert(perf_code_arena->size_left <= perf_code_arena->size); |
376 | 0 | return code_arena_new_code(perf_code_arena); |
377 | 0 | } |
378 | | |
379 | | static PyObject * |
380 | | py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, |
381 | | int throw) |
382 | 0 | { |
383 | 0 | if (perf_status == PERF_STATUS_FAILED || |
384 | 0 | perf_status == PERF_STATUS_NO_INIT) { |
385 | 0 | goto default_eval; |
386 | 0 | } |
387 | 0 | PyCodeObject *co = _PyFrame_GetCode(frame); |
388 | 0 | py_trampoline f = NULL; |
389 | 0 | assert(extra_code_index != -1); |
390 | 0 | int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); |
391 | 0 | if (ret != 0 || f == NULL) { |
392 | | // This is the first time we see this code object so we need |
393 | | // to compile a trampoline for it. |
394 | 0 | py_trampoline new_trampoline = compile_trampoline(); |
395 | 0 | if (new_trampoline == NULL) { |
396 | 0 | goto default_eval; |
397 | 0 | } |
398 | 0 | trampoline_api.write_state(trampoline_api.state, new_trampoline, |
399 | 0 | perf_code_arena->code_size, co); |
400 | 0 | _PyCode_SetExtra((PyObject *)co, extra_code_index, |
401 | 0 | (void *)new_trampoline); |
402 | 0 | f = new_trampoline; |
403 | 0 | } |
404 | 0 | assert(f != NULL); |
405 | 0 | return f(ts, frame, throw, _PyEval_EvalFrameDefault); |
406 | 0 | default_eval: |
407 | | // Something failed, fall back to the default evaluator. |
408 | 0 | return _PyEval_EvalFrameDefault(ts, frame, throw); |
409 | 0 | } |
410 | | #endif // PY_HAVE_PERF_TRAMPOLINE |
411 | | |
412 | | int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) |
413 | 0 | { |
414 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
415 | 0 | py_trampoline f = NULL; |
416 | 0 | assert(extra_code_index != -1); |
417 | 0 | int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); |
418 | 0 | if (ret != 0 || f == NULL) { |
419 | 0 | py_trampoline new_trampoline = compile_trampoline(); |
420 | 0 | if (new_trampoline == NULL) { |
421 | 0 | return 0; |
422 | 0 | } |
423 | 0 | trampoline_api.write_state(trampoline_api.state, new_trampoline, |
424 | 0 | perf_code_arena->code_size, co); |
425 | 0 | return _PyCode_SetExtra((PyObject *)co, extra_code_index, |
426 | 0 | (void *)new_trampoline); |
427 | 0 | } |
428 | 0 | #endif // PY_HAVE_PERF_TRAMPOLINE |
429 | 0 | return 0; |
430 | 0 | } |
431 | | |
432 | | int |
433 | | _PyIsPerfTrampolineActive(void) |
434 | 0 | { |
435 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
436 | 0 | PyThreadState *tstate = _PyThreadState_GET(); |
437 | 0 | return tstate->interp->eval_frame == py_trampoline_evaluator; |
438 | 0 | #endif |
439 | 0 | return 0; |
440 | 0 | } |
441 | | |
442 | | void |
443 | | _PyPerfTrampoline_GetCallbacks(_PyPerf_Callbacks *callbacks) |
444 | 0 | { |
445 | 0 | if (callbacks == NULL) { |
446 | 0 | return; |
447 | 0 | } |
448 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
449 | 0 | callbacks->init_state = trampoline_api.init_state; |
450 | 0 | callbacks->write_state = trampoline_api.write_state; |
451 | 0 | callbacks->free_state = trampoline_api.free_state; |
452 | 0 | #endif |
453 | 0 | return; |
454 | 0 | } |
455 | | |
456 | | int |
457 | | _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks) |
458 | 0 | { |
459 | 0 | if (callbacks == NULL) { |
460 | 0 | return -1; |
461 | 0 | } |
462 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
463 | 0 | if (trampoline_api.state) { |
464 | 0 | _PyPerfTrampoline_Fini(); |
465 | 0 | } |
466 | 0 | trampoline_api.init_state = callbacks->init_state; |
467 | 0 | trampoline_api.write_state = callbacks->write_state; |
468 | 0 | trampoline_api.free_state = callbacks->free_state; |
469 | 0 | trampoline_api.state = NULL; |
470 | 0 | #endif |
471 | 0 | return 0; |
472 | 0 | } |
473 | | |
474 | | int |
475 | | _PyPerfTrampoline_Init(int activate) |
476 | 0 | { |
477 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
478 | 0 | PyThreadState *tstate = _PyThreadState_GET(); |
479 | 0 | if (tstate->interp->eval_frame && |
480 | 0 | tstate->interp->eval_frame != py_trampoline_evaluator) { |
481 | 0 | PyErr_SetString(PyExc_RuntimeError, |
482 | 0 | "Trampoline cannot be initialized as a custom eval " |
483 | 0 | "frame is already present"); |
484 | 0 | return -1; |
485 | 0 | } |
486 | 0 | if (!activate) { |
487 | 0 | _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); |
488 | 0 | perf_status = PERF_STATUS_NO_INIT; |
489 | 0 | } |
490 | 0 | else { |
491 | 0 | _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator); |
492 | 0 | if (new_code_arena() < 0) { |
493 | 0 | return -1; |
494 | 0 | } |
495 | 0 | extra_code_index = _PyEval_RequestCodeExtraIndex(NULL); |
496 | 0 | if (extra_code_index == -1) { |
497 | 0 | return -1; |
498 | 0 | } |
499 | 0 | if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) { |
500 | 0 | trampoline_api.state = trampoline_api.init_state(); |
501 | 0 | } |
502 | 0 | perf_status = PERF_STATUS_OK; |
503 | 0 | } |
504 | 0 | #endif |
505 | 0 | return 0; |
506 | 0 | } |
507 | | |
508 | | int |
509 | | _PyPerfTrampoline_Fini(void) |
510 | 0 | { |
511 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
512 | 0 | if (perf_status != PERF_STATUS_OK) { |
513 | 0 | return 0; |
514 | 0 | } |
515 | 0 | PyThreadState *tstate = _PyThreadState_GET(); |
516 | 0 | if (tstate->interp->eval_frame == py_trampoline_evaluator) { |
517 | 0 | _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); |
518 | 0 | } |
519 | 0 | if (perf_status == PERF_STATUS_OK) { |
520 | 0 | trampoline_api.free_state(trampoline_api.state); |
521 | 0 | perf_trampoline_type = PERF_TRAMPOLINE_UNSET; |
522 | 0 | } |
523 | 0 | extra_code_index = -1; |
524 | 0 | perf_status = PERF_STATUS_NO_INIT; |
525 | 0 | #endif |
526 | 0 | return 0; |
527 | 0 | } |
528 | | |
529 | 0 | void _PyPerfTrampoline_FreeArenas(void) { |
530 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
531 | 0 | free_code_arenas(); |
532 | 0 | #endif |
533 | 0 | return; |
534 | 0 | } |
535 | | |
536 | | int |
537 | 0 | PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){ |
538 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
539 | 0 | persist_after_fork = enable; |
540 | 0 | return persist_after_fork; |
541 | 0 | #endif |
542 | 0 | return 0; |
543 | 0 | } |
544 | | |
545 | | PyStatus |
546 | | _PyPerfTrampoline_AfterFork_Child(void) |
547 | 0 | { |
548 | 0 | #ifdef PY_HAVE_PERF_TRAMPOLINE |
549 | 0 | if (persist_after_fork) { |
550 | 0 | if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) { |
551 | 0 | return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map."); |
552 | 0 | } |
553 | 0 | _PyPerfTrampoline_Fini(); |
554 | 0 | char filename[256]; |
555 | 0 | pid_t parent_pid = getppid(); |
556 | 0 | snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid); |
557 | 0 | if (PyUnstable_CopyPerfMapFile(filename) != 0) { |
558 | 0 | return PyStatus_Error("Failed to copy perf map file."); |
559 | 0 | } |
560 | 0 | } else { |
561 | | // Restart trampoline in file in child. |
562 | 0 | int was_active = _PyIsPerfTrampolineActive(); |
563 | 0 | _PyPerfTrampoline_Fini(); |
564 | 0 | if (was_active) { |
565 | 0 | _PyPerfTrampoline_Init(1); |
566 | 0 | } |
567 | 0 | } |
568 | 0 | #endif |
569 | 0 | return PyStatus_Ok(); |
570 | 0 | } |