/src/cpython/Python/jit_unwind.c
Line | Count | Source |
1 | | /* |
2 | | * Python JIT - DWARF .eh_frame builder |
3 | | * |
4 | | * This file contains the DWARF CFI generator used to build .eh_frame |
5 | | * data for JIT code (perf jitdump and other unwinders). |
6 | | */ |
7 | | |
8 | | #include "Python.h" |
9 | | #include "pycore_jit_unwind.h" |
10 | | #include "pycore_lock.h" |
11 | | |
12 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
13 | | # include "jit_unwind_info.h" |
14 | | # if !JIT_UNWIND_INFO_SUPPORTED |
15 | | # error "JIT unwind info was not generated for this target" |
16 | | # endif |
17 | | #endif |
18 | | |
19 | | #if defined(PY_HAVE_PERF_TRAMPOLINE) \ |
20 | | || defined(PY_HAVE_JIT_GDB_UNWIND) \ |
21 | | || defined(PY_HAVE_JIT_GNU_BACKTRACE_UNWIND) |
22 | | |
23 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
24 | | # include <elf.h> |
25 | | #endif |
26 | | #if defined(PY_HAVE_JIT_GNU_BACKTRACE_UNWIND) |
27 | | /* |
28 | | * libgcc exposes frame registration entry points, but GCC's public headers |
29 | | * on some distributions do not declare them even though the symbols are |
30 | | * available in libgcc_s. |
31 | | */ |
32 | | void __register_frame(const void *); |
33 | | void __deregister_frame(const void *); |
34 | | #endif |
35 | | #include <stdio.h> |
36 | | #include <string.h> |
37 | | |
38 | | // ============================================================================= |
39 | | // DWARF CONSTANTS |
40 | | // ============================================================================= |
41 | | |
42 | | /* |
43 | | * DWARF (Debug With Arbitrary Record Formats) constants |
44 | | * |
45 | | * DWARF is a debugging data format used to provide stack unwinding information. |
46 | | * These constants define the various encoding types and opcodes used in |
47 | | * DWARF Call Frame Information (CFI) records. |
48 | | */ |
49 | | |
50 | | /* DWARF Call Frame Information version */ |
51 | | #define DWRF_CIE_VERSION 1 |
52 | | |
53 | | /* DWARF CFA (Call Frame Address) opcodes */ |
54 | | enum { |
55 | | DWRF_CFA_nop = 0x0, // No operation |
56 | | DWRF_CFA_offset_extended = 0x5, // Extended offset instruction |
57 | | DWRF_CFA_def_cfa = 0xc, // Define CFA rule |
58 | | DWRF_CFA_def_cfa_register = 0xd, // Define CFA register |
59 | | DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset |
60 | | DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset |
61 | | DWRF_CFA_advance_loc = 0x40, // Advance location counter |
62 | | DWRF_CFA_offset = 0x80, // Simple offset instruction |
63 | | #if defined(__aarch64__) |
64 | | DWRF_CFA_AARCH64_negate_ra_state = 0x2d, // Toggle return address signing state |
65 | | #endif |
66 | | DWRF_CFA_restore = 0xc0 // Restore register |
67 | | }; |
68 | | |
69 | | /* |
70 | | * Architecture-specific DWARF register numbers |
71 | | * |
72 | | * These constants define the register numbering scheme used by DWARF |
73 | | * for each supported architecture. The numbers must match the ABI |
74 | | * specification for proper stack unwinding. |
75 | | */ |
76 | | enum { |
77 | | #ifdef __x86_64__ |
78 | | /* x86_64 register numbering (note: order is defined by x86_64 ABI) */ |
79 | | DWRF_REG_AX, // RAX |
80 | | DWRF_REG_DX, // RDX |
81 | | DWRF_REG_CX, // RCX |
82 | | DWRF_REG_BX, // RBX |
83 | | DWRF_REG_SI, // RSI |
84 | | DWRF_REG_DI, // RDI |
85 | | DWRF_REG_BP, // RBP |
86 | | DWRF_REG_SP, // RSP |
87 | | DWRF_REG_8, // R8 |
88 | | DWRF_REG_9, // R9 |
89 | | DWRF_REG_10, // R10 |
90 | | DWRF_REG_11, // R11 |
91 | | DWRF_REG_12, // R12 |
92 | | DWRF_REG_13, // R13 |
93 | | DWRF_REG_14, // R14 |
94 | | DWRF_REG_15, // R15 |
95 | | DWRF_REG_RA, // Return address (RIP) |
96 | | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) |
97 | | /* AArch64 register numbering */ |
98 | | DWRF_REG_FP = 29, // Frame Pointer |
99 | | DWRF_REG_RA = 30, // Link register (return address) |
100 | | DWRF_REG_SP = 31, // Stack pointer |
101 | | #else |
102 | | # error "Unsupported target architecture" |
103 | | #endif |
104 | | }; |
105 | | |
106 | | // ============================================================================= |
107 | | // ELF OBJECT CONTEXT |
108 | | // ============================================================================= |
109 | | |
110 | | /* |
111 | | * Context for building ELF/DWARF structures |
112 | | * |
113 | | * This structure maintains state while constructing DWARF unwind information. |
114 | | * It acts as a simple buffer manager with pointers to track current position |
115 | | * and important landmarks within the buffer. |
116 | | */ |
117 | | typedef struct ELFObjectContext { |
118 | | uint8_t* p; // Current write position in buffer |
119 | | uint8_t* startp; // Start of buffer (for offset calculations) |
120 | | uint8_t* fde_p; // Start of FDE data (for PC-relative calculations) |
121 | | uintptr_t code_addr; // Address of the code section |
122 | | size_t code_size; // Size of the code section |
123 | | } ELFObjectContext; |
124 | | |
125 | | // ============================================================================= |
126 | | // DWARF GENERATION UTILITIES |
127 | | // ============================================================================= |
128 | | |
129 | | /* |
130 | | * Append a null-terminated string to the ELF context buffer. |
131 | | * |
132 | | * Args: |
133 | | * ctx: ELF object context |
134 | | * str: String to append (must be null-terminated) |
135 | | * |
136 | | * Returns: Offset from start of buffer where string was written |
137 | | */ |
138 | 0 | static uint32_t elfctx_append_string(ELFObjectContext* ctx, const char* str) { |
139 | 0 | uint8_t* p = ctx->p; |
140 | 0 | uint32_t ofs = (uint32_t)(p - ctx->startp); |
141 | | |
142 | | /* Copy string including null terminator */ |
143 | 0 | do { |
144 | 0 | *p++ = (uint8_t)*str; |
145 | 0 | } while (*str++); |
146 | |
|
147 | 0 | ctx->p = p; |
148 | 0 | return ofs; |
149 | 0 | } |
150 | | |
151 | | /* |
152 | | * Append a SLEB128 (Signed Little Endian Base 128) value |
153 | | * |
154 | | * SLEB128 is a variable-length encoding used extensively in DWARF. |
155 | | * It efficiently encodes small numbers in fewer bytes. |
156 | | * |
157 | | * Args: |
158 | | * ctx: ELF object context |
159 | | * v: Signed value to encode |
160 | | */ |
161 | 0 | static void elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) { |
162 | 0 | uint8_t* p = ctx->p; |
163 | | |
164 | | /* Encode 7 bits at a time, with continuation bit in MSB */ |
165 | 0 | for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) { |
166 | 0 | *p++ = (uint8_t)((v & 0x7f) | 0x80); // Set continuation bit |
167 | 0 | } |
168 | 0 | *p++ = (uint8_t)(v & 0x7f); // Final byte without continuation bit |
169 | |
|
170 | 0 | ctx->p = p; |
171 | 0 | } |
172 | | |
173 | | /* |
174 | | * Append a ULEB128 (Unsigned Little Endian Base 128) value |
175 | | * |
176 | | * Similar to SLEB128 but for unsigned values. |
177 | | * |
178 | | * Args: |
179 | | * ctx: ELF object context |
180 | | * v: Unsigned value to encode |
181 | | */ |
182 | 0 | static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { |
183 | 0 | uint8_t* p = ctx->p; |
184 | | |
185 | | /* Encode 7 bits at a time, with continuation bit in MSB */ |
186 | 0 | for (; v >= 0x80; v >>= 7) { |
187 | 0 | *p++ = (char)((v & 0x7f) | 0x80); // Set continuation bit |
188 | 0 | } |
189 | 0 | *p++ = (char)v; // Final byte without continuation bit |
190 | |
|
191 | 0 | ctx->p = p; |
192 | 0 | } |
193 | | |
194 | | /* |
195 | | * Macros for generating DWARF structures |
196 | | * |
197 | | * These macros provide a convenient way to write various data types |
198 | | * to the DWARF buffer while automatically advancing the pointer. |
199 | | */ |
200 | | #define DWRF_U8(x) (*p++ = (x)) // Write unsigned 8-bit |
201 | | #define DWRF_I8(x) (*(int8_t*)p = (x), p++) // Write signed 8-bit |
202 | | #define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) // Write unsigned 16-bit |
203 | | #define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) // Write unsigned 32-bit |
204 | | #define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) // Write address |
205 | | #define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) // Write ULEB128 |
206 | | #define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) // Write SLEB128 |
207 | | #define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) // Write string |
208 | | |
209 | | /* Align to specified boundary with NOP instructions */ |
210 | | #define DWRF_ALIGNNOP(s) \ |
211 | | while ((uintptr_t)p & ((s)-1)) { \ |
212 | | *p++ = DWRF_CFA_nop; \ |
213 | | } |
214 | | |
215 | | /* Write a DWARF section with automatic size calculation */ |
216 | | #define DWRF_SECTION(name, stmt) \ |
217 | 0 | { \ |
218 | 0 | uint32_t* szp_##name = (uint32_t*)p; \ |
219 | 0 | p += 4; \ |
220 | 0 | stmt; \ |
221 | 0 | *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ |
222 | 0 | } |
223 | | |
224 | | // ============================================================================= |
225 | | // DWARF EH FRAME GENERATION |
226 | | // ============================================================================= |
227 | | |
228 | | static void elf_init_ehframe_perf(ELFObjectContext* ctx); |
229 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
230 | | static void elf_init_ehframe_gdb(ELFObjectContext* ctx); |
231 | | #endif |
232 | | |
233 | 0 | static inline void elf_init_ehframe(ELFObjectContext* ctx, int absolute_addr) { |
234 | 0 | if (absolute_addr) { |
235 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
236 | | elf_init_ehframe_gdb(ctx); |
237 | | #else |
238 | 0 | Py_UNREACHABLE(); |
239 | 0 | #endif |
240 | 0 | } |
241 | 0 | else { |
242 | 0 | elf_init_ehframe_perf(ctx); |
243 | 0 | } |
244 | 0 | } |
245 | | |
246 | | size_t |
247 | | _PyJitUnwind_EhFrameSize(int absolute_addr) |
248 | 0 | { |
249 | | /* The .eh_frame we emit is small and bounded; keep a generous buffer. */ |
250 | 0 | uint8_t scratch[512]; |
251 | 0 | _Static_assert(sizeof(scratch) >= 256, |
252 | 0 | "scratch buffer may be too small for elf_init_ehframe"); |
253 | 0 | ELFObjectContext ctx; |
254 | 0 | ctx.code_size = 1; |
255 | 0 | ctx.code_addr = 0; |
256 | 0 | ctx.startp = ctx.p = scratch; |
257 | 0 | ctx.fde_p = NULL; |
258 | | /* Generate once into scratch to learn the required size. */ |
259 | 0 | elf_init_ehframe(&ctx, absolute_addr); |
260 | 0 | ptrdiff_t size = ctx.p - ctx.startp; |
261 | 0 | assert(size <= (ptrdiff_t)sizeof(scratch)); |
262 | 0 | return (size_t)size; |
263 | 0 | } |
264 | | |
265 | | size_t |
266 | | _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size, |
267 | | const void *code_addr, size_t code_size, |
268 | | int absolute_addr) |
269 | 0 | { |
270 | 0 | if (buffer == NULL || code_addr == NULL || code_size == 0) { |
271 | 0 | return 0; |
272 | 0 | } |
273 | | /* Generate the frame twice: once to size-check, once to write. */ |
274 | 0 | size_t required = _PyJitUnwind_EhFrameSize(absolute_addr); |
275 | 0 | if (required == 0 || required > buffer_size) { |
276 | 0 | return 0; |
277 | 0 | } |
278 | 0 | ELFObjectContext ctx; |
279 | 0 | ctx.code_size = code_size; |
280 | 0 | ctx.code_addr = (uintptr_t)code_addr; |
281 | 0 | ctx.startp = ctx.p = buffer; |
282 | 0 | ctx.fde_p = NULL; |
283 | 0 | elf_init_ehframe(&ctx, absolute_addr); |
284 | 0 | size_t written = (size_t)(ctx.p - ctx.startp); |
285 | | /* The frame size is independent of code_addr/code_size (fixed-width fields). */ |
286 | 0 | assert(written == required); |
287 | 0 | return written; |
288 | 0 | } |
289 | | |
290 | | /* |
291 | | * Generate a minimal .eh_frame for a single JIT code region. |
292 | | * |
293 | | * The .eh_frame section contains Call Frame Information (CFI) that describes |
294 | | * how to unwind the stack at any point in the code. This is essential for |
295 | | * unwinding through JIT-generated code. |
296 | | * |
297 | | * The generated data contains: |
298 | | * 1. A CIE (Common Information Entry) describing the calling convention. |
299 | | * 2. An FDE (Frame Description Entry) describing how to unwind the JIT frame. |
300 | | * |
301 | | * Two flavors are emitted, dispatched on the absolute_addr flag: |
302 | | * |
303 | | * - absolute_addr == 0 (elf_init_ehframe_perf): PC-relative FDE address |
304 | | * encoding for perf's synthesized DSO layout. The CIE describes the |
305 | | * trampoline's entry state and the FDE walks through the prologue and |
306 | | * epilogue with advance_loc instructions. This matches the pre-existing |
307 | | * perf_jit_trampoline behavior byte-for-byte. |
308 | | * |
309 | | * - absolute_addr == 1 (elf_init_ehframe_gdb): absolute FDE address |
310 | | * encoding for the GDB JIT in-memory ELF. The CIE describes the |
311 | | * steady-state frame layout (CFA = %rbp+16 / x29+16, with saved fp and |
312 | | * return-address column at fixed offsets) and the FDE emits no further |
313 | | * CFI. The same rule applies at every PC in the registered region, |
314 | | * which is correct for executor stencils (they pin the frame pointer |
315 | | * across the region). This is the GDB-side fix; see elf_init_ehframe_gdb |
316 | | * for details. |
317 | | */ |
318 | 0 | static void elf_init_ehframe_perf(ELFObjectContext* ctx) { |
319 | 0 | int fde_ptr_enc = DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4; |
320 | 0 | uint8_t* p = ctx->p; |
321 | 0 | uint8_t* framep = p; // Remember start of frame data |
322 | | |
323 | | /* |
324 | | * DWARF Unwind Table for Trampoline Function |
325 | | * |
326 | | * This section defines DWARF Call Frame Information (CFI) using encoded macros |
327 | | * like `DWRF_U8`, `DWRF_UV`, and `DWRF_SECTION` to describe how the trampoline function |
328 | | * preserves and restores registers. This is used by profiling tools (e.g., `perf`) |
329 | | * and debuggers for stack unwinding in JIT-compiled code. |
330 | | * |
331 | | * ------------------------------------------------- |
332 | | * TO REGENERATE THIS TABLE FROM GCC OBJECTS: |
333 | | * ------------------------------------------------- |
334 | | * |
335 | | * 1. Create a trampoline source file (e.g., `trampoline.c`): |
336 | | * |
337 | | * #include <Python.h> |
338 | | * typedef PyObject* (*py_evaluator)(void*, void*, int); |
339 | | * PyObject* trampoline(void *ts, void *f, int throwflag, py_evaluator evaluator) { |
340 | | * return evaluator(ts, f, throwflag); |
341 | | * } |
342 | | * |
343 | | * 2. Compile to an object file with frame pointer preservation: |
344 | | * |
345 | | * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c |
346 | | * |
347 | | * 3. Extract DWARF unwind info from the object file: |
348 | | * |
349 | | * readelf -w trampoline.o |
350 | | * |
351 | | * Example output from `.eh_frame`: |
352 | | * |
353 | | * 00000000 CIE |
354 | | * Version: 1 |
355 | | * Augmentation: "zR" |
356 | | * Code alignment factor: 4 |
357 | | * Data alignment factor: -8 |
358 | | * Return address column: 30 |
359 | | * DW_CFA_def_cfa: r31 (sp) ofs 0 |
360 | | * |
361 | | * 00000014 FDE cie=00000000 pc=0..14 |
362 | | * DW_CFA_advance_loc: 4 |
363 | | * DW_CFA_def_cfa_offset: 16 |
364 | | * DW_CFA_offset: r29 at cfa-16 |
365 | | * DW_CFA_offset: r30 at cfa-8 |
366 | | * DW_CFA_advance_loc: 12 |
367 | | * DW_CFA_restore: r30 |
368 | | * DW_CFA_restore: r29 |
369 | | * DW_CFA_def_cfa_offset: 0 |
370 | | * |
371 | | * -- These values can be verified by comparing with `readelf -w` or `llvm-dwarfdump --eh-frame`. |
372 | | * |
373 | | * ---------------------------------- |
374 | | * HOW TO TRANSLATE TO DWRF_* MACROS: |
375 | | * ---------------------------------- |
376 | | * |
377 | | * After compiling your trampoline with: |
378 | | * |
379 | | * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c |
380 | | * |
381 | | * run: |
382 | | * |
383 | | * readelf -w trampoline.o |
384 | | * |
385 | | * to inspect the generated `.eh_frame` data. You will see two main components: |
386 | | * |
387 | | * 1. A CIE (Common Information Entry): shared configuration used by all FDEs. |
388 | | * 2. An FDE (Frame Description Entry): function-specific unwind instructions. |
389 | | * |
390 | | * --------------------- |
391 | | * Translating the CIE: |
392 | | * --------------------- |
393 | | * From `readelf -w`, you might see: |
394 | | * |
395 | | * 00000000 0000000000000010 00000000 CIE |
396 | | * Version: 1 |
397 | | * Augmentation: "zR" |
398 | | * Code alignment factor: 4 |
399 | | * Data alignment factor: -8 |
400 | | * Return address column: 30 |
401 | | * Augmentation data: 1b |
402 | | * DW_CFA_def_cfa: r31 (sp) ofs 0 |
403 | | * |
404 | | * Map this to: |
405 | | * |
406 | | * DWRF_SECTION(CIE, |
407 | | * DWRF_U32(0); // CIE ID (always 0 for CIEs) |
408 | | * DWRF_U8(DWRF_CIE_VERSION); // Version: 1 |
409 | | * DWRF_STR("zR"); // Augmentation string "zR" |
410 | | * DWRF_UV(4); // Code alignment factor = 4 |
411 | | * DWRF_SV(-8); // Data alignment factor = -8 |
412 | | * DWRF_U8(DWRF_REG_RA); // Return address register (e.g., x30 = 30) |
413 | | * DWRF_UV(1); // Augmentation data length = 1 |
414 | | * DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // Encoding for FDE pointers |
415 | | * |
416 | | * DWRF_U8(DWRF_CFA_def_cfa); // DW_CFA_def_cfa |
417 | | * DWRF_UV(DWRF_REG_SP); // Register: SP (r31) |
418 | | * DWRF_UV(0); // Offset = 0 |
419 | | * |
420 | | * DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer size boundary |
421 | | * ) |
422 | | * |
423 | | * Notes: |
424 | | * - Use `DWRF_UV` for unsigned LEB128, `DWRF_SV` for signed LEB128. |
425 | | * - `DWRF_REG_RA` and `DWRF_REG_SP` are architecture-defined constants. |
426 | | * |
427 | | * --------------------- |
428 | | * Translating the FDE: |
429 | | * --------------------- |
430 | | * From `readelf -w`: |
431 | | * |
432 | | * 00000014 0000000000000020 00000018 FDE cie=00000000 pc=0000000000000000..0000000000000014 |
433 | | * DW_CFA_advance_loc: 4 |
434 | | * DW_CFA_def_cfa_offset: 16 |
435 | | * DW_CFA_offset: r29 at cfa-16 |
436 | | * DW_CFA_offset: r30 at cfa-8 |
437 | | * DW_CFA_advance_loc: 12 |
438 | | * DW_CFA_restore: r30 |
439 | | * DW_CFA_restore: r29 |
440 | | * DW_CFA_def_cfa_offset: 0 |
441 | | * |
442 | | * Map the FDE header and instructions to: |
443 | | * |
444 | | * DWRF_SECTION(FDE, |
445 | | * DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here) |
446 | | * DWRF_U32(pc_relative_offset); // PC-relative location of the code (calculated dynamically) |
447 | | * DWRF_U32(ctx->code_size); // Code range covered by this FDE |
448 | | * DWRF_U8(0); // Augmentation data length (none) |
449 | | * |
450 | | * DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 unit (1 * 4 = 4 bytes) |
451 | | * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 |
452 | | * DWRF_UV(16); |
453 | | * |
454 | | * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Save x29 (frame pointer) |
455 | | * DWRF_UV(2); // At offset 2 * 8 = 16 bytes |
456 | | * |
457 | | * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Save x30 (return address) |
458 | | * DWRF_UV(1); // At offset 1 * 8 = 8 bytes |
459 | | * |
460 | | * DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance location by 3 units (3 * 4 = 12 bytes) |
461 | | * |
462 | | * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore x30 |
463 | | * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore x29 |
464 | | * |
465 | | * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP |
466 | | * DWRF_UV(0); |
467 | | * ) |
468 | | * |
469 | | * To regenerate: |
470 | | * 1. Get the `code alignment factor`, `data alignment factor`, and `RA column` from the CIE. |
471 | | * 2. Note the range of the function from the FDE's `pc=...` line and map it to the JIT code as |
472 | | * the code is in a different address space every time. |
473 | | * 3. For each `DW_CFA_*` entry, use the corresponding `DWRF_*` macro: |
474 | | * - `DW_CFA_def_cfa_offset` → DWRF_U8(DWRF_CFA_def_cfa_offset), DWRF_UV(value) |
475 | | * - `DW_CFA_offset: rX` → DWRF_U8(DWRF_CFA_offset | reg), DWRF_UV(offset) |
476 | | * - `DW_CFA_restore: rX` → DWRF_U8(DWRF_CFA_offset | reg) // restore is same as reusing offset |
477 | | * - `DW_CFA_advance_loc: N` → DWRF_U8(DWRF_CFA_advance_loc | (N / code_alignment_factor)) |
478 | | * 4. Use `DWRF_REG_FP`, `DWRF_REG_RA`, etc., for register numbers. |
479 | | * 5. Use `sizeof(uintptr_t)` (typically 8) for pointer size calculations and alignment. |
480 | | */ |
481 | | |
482 | | /* |
483 | | * Emit DWARF EH CIE (Common Information Entry) |
484 | | * |
485 | | * The CIE describes the calling conventions and basic unwinding rules |
486 | | * that apply to all functions in this compilation unit. |
487 | | */ |
488 | 0 | DWRF_SECTION(CIE, |
489 | 0 | DWRF_U32(0); // CIE ID (0 indicates this is a CIE) |
490 | 0 | DWRF_U8(DWRF_CIE_VERSION); // CIE version (1) |
491 | 0 | DWRF_STR("zR"); // Augmentation string ("zR" = has LSDA) |
492 | 0 | #ifdef __x86_64__ |
493 | 0 | DWRF_UV(1); // Code alignment factor (x86_64: 1 byte) |
494 | | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) |
495 | | DWRF_UV(4); // Code alignment factor (AArch64: 4 bytes per instruction) |
496 | | #endif |
497 | 0 | DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative) |
498 | 0 | DWRF_U8(DWRF_REG_RA); // Return address register number |
499 | 0 | DWRF_UV(1); // Augmentation data length |
500 | 0 | DWRF_U8(fde_ptr_enc); // FDE pointer encoding |
501 | | |
502 | | /* Initial CFI instructions - describe default calling convention */ |
503 | 0 | #ifdef __x86_64__ |
504 | | /* x86_64 initial CFI state */ |
505 | 0 | DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) |
506 | 0 | DWRF_UV(DWRF_REG_SP); // CFA = SP register |
507 | 0 | DWRF_UV(sizeof(uintptr_t)); // CFA = SP + pointer_size |
508 | 0 | DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); // Return address is saved |
509 | 0 | DWRF_UV(1); // At offset 1 from CFA |
510 | | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) |
511 | | /* AArch64 initial CFI state */ |
512 | | DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) |
513 | | DWRF_UV(DWRF_REG_SP); // CFA = SP register |
514 | | DWRF_UV(0); // CFA = SP + 0 (AArch64 starts with offset 0) |
515 | | // No initial register saves in AArch64 CIE |
516 | | #endif |
517 | 0 | DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary |
518 | 0 | ) |
519 | | |
520 | | /* |
521 | | * Emit DWARF EH FDE (Frame Description Entry) |
522 | | * |
523 | | * The FDE describes unwinding information specific to this function. |
524 | | * It references the CIE and provides function-specific CFI instructions. |
525 | | * |
526 | | * The PC-relative offset is calculated after the entire EH frame is built |
527 | | * to ensure accurate positioning relative to the synthesized DSO layout. |
528 | | */ |
529 | 0 | DWRF_SECTION(FDE, |
530 | 0 | DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference) |
531 | | /* |
532 | | * In perf jitdump mode the FDE PC field is encoded PC-relative and |
533 | | * points back to code_start. Record where that field lives so we can |
534 | | * patch in the final offset after the rest of the synthetic DSO |
535 | | * layout is known. |
536 | | */ |
537 | 0 | ctx->fde_p = p; // Remember where PC offset field is located for later calculation |
538 | 0 | DWRF_U32(0); // Placeholder for PC-relative offset (calculated below) |
539 | 0 | DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length) |
540 | 0 | DWRF_U8(0); // Augmentation data length (none) |
541 | | |
542 | | /* |
543 | | * Architecture-specific CFI instructions |
544 | | * |
545 | | * These instructions describe how registers are saved and restored |
546 | | * during function calls. Each architecture has different calling |
547 | | * conventions and register usage patterns. |
548 | | */ |
549 | 0 | #ifdef __x86_64__ |
550 | | /* x86_64 calling convention unwinding rules */ |
551 | | # if defined(__CET__) && (__CET__ & 1) |
552 | | DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance past endbr64 (4 bytes) |
553 | | # endif |
554 | 0 | DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte) |
555 | 0 | DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16 |
556 | 0 | DWRF_UV(16); // New offset: SP + 16 |
557 | 0 | DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16 |
558 | 0 | DWRF_UV(2); // Offset factor: 2 * 8 = 16 bytes |
559 | 0 | DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes) |
560 | 0 | DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6 |
561 | 0 | DWRF_UV(DWRF_REG_BP); // Use base pointer register |
562 | 0 | DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3 |
563 | 0 | DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8 |
564 | 0 | DWRF_UV(DWRF_REG_SP); // Use stack pointer register |
565 | 0 | DWRF_UV(8); // New offset: SP + 8 |
566 | | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) |
567 | | /* AArch64 calling convention unwinding rules */ |
568 | | #if defined(__ARM_FEATURE_PAC_DEFAULT) || \ |
569 | | (defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1) |
570 | | DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past SIGN_LR (4 bytes) |
571 | | #endif |
572 | | #if defined(__ARM_FEATURE_PAC_DEFAULT) |
573 | | DWRF_U8(DWRF_CFA_AARCH64_negate_ra_state); // Saved LR is PAC-signed from here |
574 | | #endif |
575 | | DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance by 1 instruction (4 bytes) |
576 | | DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 |
577 | | DWRF_UV(16); // Stack pointer moved by 16 bytes |
578 | | DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // x29 (frame pointer) saved |
579 | | DWRF_UV(2); // At CFA-16 (2 * 8 = 16 bytes from CFA) |
580 | | DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // x30 (link register) saved |
581 | | DWRF_UV(1); // At CFA-8 (1 * 8 = 8 bytes from CFA) |
582 | | DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (12 bytes) |
583 | | #if defined(__ARM_FEATURE_PAC_DEFAULT) |
584 | | DWRF_U8(DWRF_CFA_AARCH64_negate_ra_state); // LR is authenticated, no longer PAC-signed |
585 | | #endif |
586 | | DWRF_U8(DWRF_CFA_def_cfa_register); // CFA = FP (x29) + 16 |
587 | | DWRF_UV(DWRF_REG_FP); |
588 | | DWRF_U8(DWRF_CFA_restore | DWRF_REG_RA); // Restore x30 - NO DWRF_UV() after this! |
589 | | DWRF_U8(DWRF_CFA_restore | DWRF_REG_FP); // Restore x29 - NO DWRF_UV() after this! |
590 | | DWRF_U8(DWRF_CFA_def_cfa); // CFA = SP + 0 (stack restored) |
591 | | DWRF_UV(DWRF_REG_SP); |
592 | | DWRF_UV(0); |
593 | | |
594 | | #else |
595 | | # error "Unsupported target architecture" |
596 | | #endif |
597 | |
|
598 | 0 | DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary |
599 | 0 | ) |
600 | |
|
601 | 0 | ctx->p = p; // Update context pointer to end of generated data |
602 | | |
603 | | /* Calculate and update the PC-relative offset in the FDE |
604 | | * |
605 | | * When perf processes the jitdump, it creates a synthesized DSO with this layout: |
606 | | * |
607 | | * Synthesized DSO Memory Layout: |
608 | | * ┌─────────────────────────────────────────────────────────────┐ < code_start |
609 | | * │ Code Section │ |
610 | | * │ (round_up(code_size, 8) bytes) │ |
611 | | * ├─────────────────────────────────────────────────────────────┤ < start of EH frame data |
612 | | * │ EH Frame Data │ |
613 | | * │ ┌─────────────────────────────────────────────────────┐ │ |
614 | | * │ │ CIE data │ │ |
615 | | * │ └─────────────────────────────────────────────────────┘ │ |
616 | | * │ ┌─────────────────────────────────────────────────────┐ │ |
617 | | * │ │ FDE Header: │ │ |
618 | | * │ │ - CIE offset (4 bytes) │ │ |
619 | | * │ │ - PC offset (4 bytes) <─ fde_offset_in_frame ─────┼────┼─> points to code_start |
620 | | * │ │ - address range (4 bytes) │ │ (this specific field) |
621 | | * │ │ CFI Instructions... │ │ |
622 | | * │ └─────────────────────────────────────────────────────┘ │ |
623 | | * ├─────────────────────────────────────────────────────────────┤ < reference_point |
624 | | * │ EhFrameHeader │ |
625 | | * │ (navigation metadata) │ |
626 | | * └─────────────────────────────────────────────────────────────┘ |
627 | | * |
628 | | * The PC offset field in the FDE must contain the distance from itself to code_start: |
629 | | * |
630 | | * distance = code_start - fde_pc_field |
631 | | * |
632 | | * Where: |
633 | | * fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame |
634 | | * code_start_location = reference_point - eh_frame_size - round_up(code_size, 8) |
635 | | * |
636 | | * Therefore: |
637 | | * distance = code_start_location - fde_pc_field_location |
638 | | * = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame) |
639 | | * = -rounded_code_size - fde_offset_in_frame |
640 | | * = -(round_up(code_size, 8) + fde_offset_in_frame) |
641 | | * |
642 | | * Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field. |
643 | | * |
644 | | */ |
645 | 0 | int32_t rounded_code_size = |
646 | 0 | (int32_t)_Py_SIZE_ROUND_UP(ctx->code_size, 8); |
647 | 0 | int32_t fde_offset_in_frame = (int32_t)(ctx->fde_p - framep); |
648 | 0 | *(int32_t *)ctx->fde_p = -(rounded_code_size + fde_offset_in_frame); |
649 | 0 | } |
650 | | |
651 | | /* |
652 | | * Build .eh_frame data for the GDB JIT interface. |
653 | | * |
654 | | * The executor runs inside the frame established by _PyJIT_Entry, but the |
655 | | * synthetic executor FDE collapses that state into a single logical JIT frame |
656 | | * that unwinds directly into _PyEval_*. Executor stencils never touch the |
657 | | * frame pointer - enforced by Tools/jit/_optimizers.py _validate() and |
658 | | * -mframe-pointer=reserved - so the steady-state rule is valid at every PC |
659 | | * and the FDE body is empty. Tools/jit/_targets.py derives the initial CFI |
660 | | * rules from the row active at the executor call in the compiled shim object. |
661 | | */ |
662 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
663 | | static void elf_init_ehframe_gdb(ELFObjectContext* ctx) { |
664 | | int fde_ptr_enc = DWRF_EH_PE_absptr; |
665 | | uint8_t* p = ctx->p; |
666 | | uint8_t* framep = p; |
667 | | |
668 | | DWRF_SECTION(CIE, |
669 | | DWRF_U32(0); // CIE ID |
670 | | DWRF_U8(DWRF_CIE_VERSION); |
671 | | DWRF_STR("zR"); // aug data length + FDE ptr encoding follow |
672 | | DWRF_UV(JIT_UNWIND_CODE_ALIGNMENT_FACTOR); |
673 | | DWRF_SV(JIT_UNWIND_DATA_ALIGNMENT_FACTOR); |
674 | | DWRF_U8(JIT_UNWIND_RA_REG); |
675 | | DWRF_UV(1); // Augmentation data length |
676 | | DWRF_U8(fde_ptr_enc); // FDE pointer encoding |
677 | | |
678 | | /* Executor steady-state rule (our invariant, not the compiler's). */ |
679 | | DWRF_U8(DWRF_CFA_def_cfa); |
680 | | DWRF_UV(JIT_UNWIND_CFA_REG); |
681 | | DWRF_UV(JIT_UNWIND_CFA_OFFSET); |
682 | | DWRF_U8(DWRF_CFA_offset | JIT_UNWIND_FP_REG); |
683 | | DWRF_UV(JIT_UNWIND_FP_OFFSET); |
684 | | DWRF_U8(DWRF_CFA_offset | JIT_UNWIND_RA_REG); |
685 | | DWRF_UV(JIT_UNWIND_RA_OFFSET); |
686 | | DWRF_ALIGNNOP(sizeof(uintptr_t)); |
687 | | ) |
688 | | |
689 | | DWRF_SECTION(FDE, |
690 | | DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference) |
691 | | DWRF_ADDR(ctx->code_addr); // Absolute code start |
692 | | DWRF_ADDR((uintptr_t)ctx->code_size); // Code range covered |
693 | | DWRF_U8(0); // Augmentation data length (none) |
694 | | DWRF_ALIGNNOP(sizeof(uintptr_t)); |
695 | | ) |
696 | | |
697 | | ctx->p = p; |
698 | | } |
699 | | #endif |
700 | | |
701 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
702 | | enum { |
703 | | JIT_NOACTION = 0, |
704 | | JIT_REGISTER_FN = 1, |
705 | | JIT_UNREGISTER_FN = 2, |
706 | | }; |
707 | | |
708 | | struct jit_code_entry { |
709 | | struct jit_code_entry *next; |
710 | | struct jit_code_entry *prev; |
711 | | const char *symfile_addr; |
712 | | uint64_t symfile_size; |
713 | | const void *code_addr; |
714 | | }; |
715 | | |
716 | | struct jit_descriptor { |
717 | | uint32_t version; |
718 | | uint32_t action_flag; |
719 | | struct jit_code_entry *relevant_entry; |
720 | | struct jit_code_entry *first_entry; |
721 | | }; |
722 | | |
723 | | PyMutex _Py_jit_debug_mutex = {0}; |
724 | | |
725 | | Py_EXPORTED_SYMBOL volatile struct jit_descriptor __jit_debug_descriptor = { |
726 | | 1, JIT_NOACTION, NULL, NULL |
727 | | }; |
728 | | |
729 | | Py_EXPORTED_SYMBOL void __attribute__((noinline)) |
730 | | __jit_debug_register_code(void) |
731 | | { |
732 | | /* Keep this call visible to debuggers and not optimized away. */ |
733 | | (void)__jit_debug_descriptor.action_flag; |
734 | | #if defined(__GNUC__) || defined(__clang__) |
735 | | __asm__ __volatile__("" ::: "memory"); |
736 | | #endif |
737 | | } |
738 | | |
739 | | static uint16_t |
740 | | gdb_jit_machine_id(void) |
741 | | { |
742 | | /* Map the current target to ELF e_machine; return 0 to skip registration. */ |
743 | | #if defined(__x86_64__) || defined(_M_X64) |
744 | | return EM_X86_64; |
745 | | #elif defined(__aarch64__) && !defined(__ILP32__) |
746 | | return EM_AARCH64; |
747 | | #else |
748 | | return 0; |
749 | | #endif |
750 | | } |
751 | | |
752 | | static struct jit_code_entry * |
753 | | gdb_jit_register_code( |
754 | | const void *code_addr, |
755 | | size_t code_size, |
756 | | const char *symname, |
757 | | const uint8_t *eh_frame, |
758 | | size_t eh_frame_size |
759 | | ) |
760 | | { |
761 | | /* |
762 | | * Build a minimal in-memory ELF for GDB's JIT interface and link it into |
763 | | * __jit_debug_descriptor so debuggers can resolve JIT code. |
764 | | */ |
765 | | if (code_addr == NULL || code_size == 0 || symname == NULL) { |
766 | | return NULL; |
767 | | } |
768 | | |
769 | | const uint16_t machine = gdb_jit_machine_id(); |
770 | | if (machine == 0) { |
771 | | return NULL; |
772 | | } |
773 | | |
774 | | enum { |
775 | | SH_NULL = 0, |
776 | | SH_TEXT, |
777 | | SH_EH_FRAME, |
778 | | SH_SHSTRTAB, |
779 | | SH_STRTAB, |
780 | | SH_SYMTAB, |
781 | | SH_NUM, |
782 | | }; |
783 | | static const char shstrtab[] = |
784 | | "\0.text\0.eh_frame\0.shstrtab\0.strtab\0.symtab"; |
785 | | _Static_assert(sizeof(shstrtab) == |
786 | | 1 + sizeof(".text") + sizeof(".eh_frame") + |
787 | | sizeof(".shstrtab") + sizeof(".strtab") + sizeof(".symtab"), |
788 | | "shstrtab size mismatch"); |
789 | | const size_t shstrtab_size = sizeof(shstrtab); |
790 | | const size_t sh_text = 1; |
791 | | const size_t sh_eh_frame = sh_text + sizeof(".text"); |
792 | | const size_t sh_shstrtab = sh_eh_frame + sizeof(".eh_frame"); |
793 | | const size_t sh_strtab = sh_shstrtab + sizeof(".shstrtab"); |
794 | | const size_t sh_symtab = sh_strtab + sizeof(".strtab"); |
795 | | const size_t text_size = code_size; |
796 | | const size_t text_padded = _Py_SIZE_ROUND_UP(text_size, 8); |
797 | | const size_t strtab_size = 1 + strlen(symname) + 1; |
798 | | const size_t symtab_size = 3 * sizeof(Elf64_Sym); |
799 | | |
800 | | size_t offset = sizeof(Elf64_Ehdr); |
801 | | offset = _Py_SIZE_ROUND_UP(offset, 16); |
802 | | const size_t text_off = offset; |
803 | | const size_t eh_off = text_off + text_padded; |
804 | | offset = eh_off + eh_frame_size; |
805 | | const size_t shstr_off = offset; |
806 | | offset += shstrtab_size; |
807 | | const size_t str_off = offset; |
808 | | offset += strtab_size; |
809 | | /* Elf64_Sym requires 8-byte alignment for st_value/st_size. */ |
810 | | offset = _Py_SIZE_ROUND_UP(offset, 8); |
811 | | const size_t sym_off = offset; |
812 | | offset += symtab_size; |
813 | | offset = _Py_SIZE_ROUND_UP(offset, sizeof(Elf64_Shdr)); |
814 | | const size_t sh_off = offset; |
815 | | |
816 | | const size_t shnum = SH_NUM; |
817 | | const size_t total_size = sh_off + shnum * sizeof(Elf64_Shdr); |
818 | | uint8_t *buf = (uint8_t *)PyMem_RawMalloc(total_size); |
819 | | if (buf == NULL) { |
820 | | return NULL; |
821 | | } |
822 | | memset(buf, 0, total_size); |
823 | | |
824 | | Elf64_Ehdr *ehdr = (Elf64_Ehdr *)buf; |
825 | | memcpy(ehdr->e_ident, ELFMAG, SELFMAG); |
826 | | ehdr->e_ident[EI_CLASS] = ELFCLASS64; |
827 | | ehdr->e_ident[EI_DATA] = ELFDATA2LSB; |
828 | | ehdr->e_ident[EI_VERSION] = EV_CURRENT; |
829 | | ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE; |
830 | | ehdr->e_type = ET_DYN; |
831 | | ehdr->e_machine = machine; |
832 | | ehdr->e_version = EV_CURRENT; |
833 | | ehdr->e_entry = 0; |
834 | | ehdr->e_phoff = 0; |
835 | | ehdr->e_shoff = sh_off; |
836 | | ehdr->e_ehsize = sizeof(Elf64_Ehdr); |
837 | | ehdr->e_shentsize = sizeof(Elf64_Shdr); |
838 | | ehdr->e_shnum = shnum; |
839 | | ehdr->e_shstrndx = SH_SHSTRTAB; |
840 | | |
841 | | memcpy(buf + text_off, code_addr, text_size); |
842 | | memcpy(buf + eh_off, eh_frame, eh_frame_size); |
843 | | |
844 | | char *shstr = (char *)(buf + shstr_off); |
845 | | memcpy(shstr, shstrtab, shstrtab_size); |
846 | | |
847 | | char *strtab = (char *)(buf + str_off); |
848 | | strtab[0] = '\0'; |
849 | | memcpy(strtab + 1, symname, strlen(symname)); |
850 | | strtab[strtab_size - 1] = '\0'; |
851 | | |
852 | | Elf64_Sym *syms = (Elf64_Sym *)(buf + sym_off); |
853 | | memset(syms, 0, symtab_size); |
854 | | /* Section symbol for .text (local) */ |
855 | | syms[1].st_info = ELF64_ST_INFO(STB_LOCAL, STT_SECTION); |
856 | | syms[1].st_shndx = 1; |
857 | | /* Function symbol */ |
858 | | syms[2].st_name = 1; |
859 | | syms[2].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC); |
860 | | syms[2].st_other = STV_DEFAULT; |
861 | | syms[2].st_shndx = 1; |
862 | | /* For ET_DYN/ET_EXEC, st_value is the absolute virtual address. */ |
863 | | syms[2].st_value = (Elf64_Addr)(uintptr_t)code_addr; |
864 | | syms[2].st_size = code_size; |
865 | | |
866 | | Elf64_Shdr *shdrs = (Elf64_Shdr *)(buf + sh_off); |
867 | | memset(shdrs, 0, shnum * sizeof(Elf64_Shdr)); |
868 | | |
869 | | shdrs[SH_TEXT].sh_name = sh_text; |
870 | | shdrs[SH_TEXT].sh_type = SHT_PROGBITS; |
871 | | shdrs[SH_TEXT].sh_flags = SHF_ALLOC | SHF_EXECINSTR; |
872 | | shdrs[SH_TEXT].sh_addr = (Elf64_Addr)(uintptr_t)code_addr; |
873 | | shdrs[SH_TEXT].sh_offset = text_off; |
874 | | shdrs[SH_TEXT].sh_size = text_size; |
875 | | shdrs[SH_TEXT].sh_addralign = 16; |
876 | | |
877 | | shdrs[SH_EH_FRAME].sh_name = sh_eh_frame; |
878 | | shdrs[SH_EH_FRAME].sh_type = SHT_PROGBITS; |
879 | | shdrs[SH_EH_FRAME].sh_flags = SHF_ALLOC; |
880 | | shdrs[SH_EH_FRAME].sh_addr = |
881 | | (Elf64_Addr)((uintptr_t)code_addr + text_padded); |
882 | | shdrs[SH_EH_FRAME].sh_offset = eh_off; |
883 | | shdrs[SH_EH_FRAME].sh_size = eh_frame_size; |
884 | | shdrs[SH_EH_FRAME].sh_addralign = 8; |
885 | | |
886 | | shdrs[SH_SHSTRTAB].sh_name = sh_shstrtab; |
887 | | shdrs[SH_SHSTRTAB].sh_type = SHT_STRTAB; |
888 | | shdrs[SH_SHSTRTAB].sh_offset = shstr_off; |
889 | | shdrs[SH_SHSTRTAB].sh_size = shstrtab_size; |
890 | | shdrs[SH_SHSTRTAB].sh_addralign = 1; |
891 | | |
892 | | shdrs[SH_STRTAB].sh_name = sh_strtab; |
893 | | shdrs[SH_STRTAB].sh_type = SHT_STRTAB; |
894 | | shdrs[SH_STRTAB].sh_offset = str_off; |
895 | | shdrs[SH_STRTAB].sh_size = strtab_size; |
896 | | shdrs[SH_STRTAB].sh_addralign = 1; |
897 | | |
898 | | shdrs[SH_SYMTAB].sh_name = sh_symtab; |
899 | | shdrs[SH_SYMTAB].sh_type = SHT_SYMTAB; |
900 | | shdrs[SH_SYMTAB].sh_offset = sym_off; |
901 | | shdrs[SH_SYMTAB].sh_size = symtab_size; |
902 | | shdrs[SH_SYMTAB].sh_link = SH_STRTAB; |
903 | | shdrs[SH_SYMTAB].sh_info = 2; |
904 | | shdrs[SH_SYMTAB].sh_addralign = 8; |
905 | | shdrs[SH_SYMTAB].sh_entsize = sizeof(Elf64_Sym); |
906 | | |
907 | | struct jit_code_entry *entry = PyMem_RawMalloc(sizeof(*entry)); |
908 | | if (entry == NULL) { |
909 | | PyMem_RawFree(buf); |
910 | | return NULL; |
911 | | } |
912 | | entry->symfile_addr = (const char *)buf; |
913 | | entry->symfile_size = total_size; |
914 | | entry->code_addr = code_addr; |
915 | | |
916 | | PyMutex_Lock(&_Py_jit_debug_mutex); |
917 | | entry->prev = NULL; |
918 | | entry->next = __jit_debug_descriptor.first_entry; |
919 | | if (entry->next != NULL) { |
920 | | entry->next->prev = entry; |
921 | | } |
922 | | __jit_debug_descriptor.first_entry = entry; |
923 | | __jit_debug_descriptor.relevant_entry = entry; |
924 | | __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; |
925 | | __jit_debug_register_code(); |
926 | | __jit_debug_descriptor.action_flag = JIT_NOACTION; |
927 | | __jit_debug_descriptor.relevant_entry = NULL; |
928 | | PyMutex_Unlock(&_Py_jit_debug_mutex); |
929 | | return entry; |
930 | | } |
931 | | #endif // defined(PY_HAVE_JIT_GDB_UNWIND) |
932 | | |
933 | | void * |
934 | | _PyJitUnwind_GdbRegisterCode(const void *code_addr, |
935 | | size_t code_size, |
936 | | const char *entry, |
937 | | const char *filename) |
938 | 0 | { |
939 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
940 | | /* GDB expects a stable symbol name and absolute addresses in .eh_frame. */ |
941 | | if (entry == NULL) { |
942 | | entry = ""; |
943 | | } |
944 | | if (filename == NULL) { |
945 | | filename = ""; |
946 | | } |
947 | | size_t name_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; |
948 | | char *name = (char *)PyMem_RawMalloc(name_size); |
949 | | if (name == NULL) { |
950 | | return NULL; |
951 | | } |
952 | | snprintf(name, name_size, "py::%s:%s", entry, filename); |
953 | | |
954 | | uint8_t buffer[1024]; |
955 | | size_t eh_frame_size = _PyJitUnwind_BuildEhFrame( |
956 | | buffer, sizeof(buffer), code_addr, code_size, 1); |
957 | | if (eh_frame_size == 0) { |
958 | | PyMem_RawFree(name); |
959 | | return NULL; |
960 | | } |
961 | | |
962 | | void *handle = gdb_jit_register_code(code_addr, code_size, name, |
963 | | buffer, eh_frame_size); |
964 | | PyMem_RawFree(name); |
965 | | return handle; |
966 | | #else |
967 | 0 | (void)code_addr; |
968 | 0 | (void)code_size; |
969 | 0 | (void)entry; |
970 | 0 | (void)filename; |
971 | 0 | return NULL; |
972 | 0 | #endif |
973 | 0 | } |
974 | | |
975 | | void |
976 | | _PyJitUnwind_GdbUnregisterCode(void *handle) |
977 | 0 | { |
978 | | #if defined(PY_HAVE_JIT_GDB_UNWIND) |
979 | | struct jit_code_entry *entry = (struct jit_code_entry *)handle; |
980 | | if (entry == NULL) { |
981 | | return; |
982 | | } |
983 | | |
984 | | PyMutex_Lock(&_Py_jit_debug_mutex); |
985 | | if (entry->prev != NULL) { |
986 | | entry->prev->next = entry->next; |
987 | | } |
988 | | else { |
989 | | __jit_debug_descriptor.first_entry = entry->next; |
990 | | } |
991 | | if (entry->next != NULL) { |
992 | | entry->next->prev = entry->prev; |
993 | | } |
994 | | |
995 | | __jit_debug_descriptor.relevant_entry = entry; |
996 | | __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN; |
997 | | __jit_debug_register_code(); |
998 | | __jit_debug_descriptor.action_flag = JIT_NOACTION; |
999 | | __jit_debug_descriptor.relevant_entry = NULL; |
1000 | | |
1001 | | PyMutex_Unlock(&_Py_jit_debug_mutex); |
1002 | | |
1003 | | PyMem_RawFree((void *)entry->symfile_addr); |
1004 | | PyMem_RawFree(entry); |
1005 | | #else |
1006 | 0 | (void)handle; |
1007 | 0 | #endif |
1008 | 0 | } |
1009 | | |
1010 | | #if defined(PY_HAVE_JIT_GNU_BACKTRACE_UNWIND) |
1011 | | void * |
1012 | | _PyJitUnwind_GnuBacktraceRegisterCode(const void *code_addr, size_t code_size) |
1013 | | { |
1014 | | if (code_addr == NULL || code_size == 0) { |
1015 | | return NULL; |
1016 | | } |
1017 | | |
1018 | | size_t eh_frame_size = _PyJitUnwind_EhFrameSize(1); |
1019 | | if (eh_frame_size == 0) { |
1020 | | return NULL; |
1021 | | } |
1022 | | size_t total_size = eh_frame_size + sizeof(uint32_t); |
1023 | | if (total_size < eh_frame_size) { |
1024 | | return NULL; |
1025 | | } |
1026 | | |
1027 | | /* |
1028 | | * libgcc's __register_frame walks a .eh_frame section until it finds a |
1029 | | * zero-length terminator entry, so keep an extra zeroed word after the |
1030 | | * generated CIE/FDE pair. |
1031 | | * |
1032 | | * See GCC's libgcc/unwind-dw2-fde.c (__register_frame) and |
1033 | | * libgcc/unwind-dw2-fde.h (last_fde/next_fde): |
1034 | | * https://github.com/gcc-mirror/gcc/blob/master/libgcc/unwind-dw2-fde.c |
1035 | | * https://github.com/gcc-mirror/gcc/blob/master/libgcc/unwind-dw2-fde.h |
1036 | | */ |
1037 | | uint8_t *eh_frame = PyMem_RawCalloc(1, total_size); |
1038 | | if (eh_frame == NULL) { |
1039 | | return NULL; |
1040 | | } |
1041 | | if (_PyJitUnwind_BuildEhFrame( |
1042 | | eh_frame, eh_frame_size, code_addr, code_size, 1) == 0) { |
1043 | | PyMem_RawFree(eh_frame); |
1044 | | return NULL; |
1045 | | } |
1046 | | |
1047 | | __register_frame(eh_frame); |
1048 | | return eh_frame; |
1049 | | } |
1050 | | |
1051 | | void |
1052 | | _PyJitUnwind_GnuBacktraceUnregisterCode(void *handle) |
1053 | | { |
1054 | | if (handle == NULL) { |
1055 | | return; |
1056 | | } |
1057 | | __deregister_frame(handle); |
1058 | | PyMem_RawFree(handle); |
1059 | | } |
1060 | | #endif // defined(PY_HAVE_JIT_GNU_BACKTRACE_UNWIND) |
1061 | | |
1062 | | #endif // JIT unwind support |