Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/gprof2dot.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2450 statements  

1#!/usr/bin/env python3 

2# 

3# Copyright 2008-2023 Jose Fonseca 

4# 

5# This program is free software: you can redistribute it and/or modify it 

6# under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# 

10# This program is distributed in the hope that it will be useful, 

11# but WITHOUT ANY WARRANTY; without even the implied warranty of 

12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

13# GNU Lesser General Public License for more details. 

14# 

15# You should have received a copy of the GNU Lesser General Public License 

16# along with this program. If not, see <http://www.gnu.org/licenses/>. 

17# 

18 

19"""Generate a dot graph from the output of several profilers.""" 

20 

21__author__ = "Jose Fonseca et al" 

22 

23 

24import sys 

25import math 

26import os.path 

27import re 

28import textwrap 

29import optparse 

30import xml.parsers.expat 

31import collections 

32import locale 

33import json 

34import fnmatch 

35import codecs 

36import io 

37import hashlib 

38 

39assert sys.version_info[0] >= 3 

40 

41 

42######################################################################## 

43# Model 

44 

45 

46MULTIPLICATION_SIGN = chr(0xd7) 

47timeFormat = "%.7g" 

48 

49 

50def times(x): 

51 return "%u%s" % (x, MULTIPLICATION_SIGN) 

52 

53def percentage(p): 

54 return "%.02f%%" % (p*100.0,) 

55 

56def fmttime(t): 

57 return timeFormat % t 

58 

59def add(a, b): 

60 return a + b 

61 

62def fail(a, b): 

63 assert False 

64 

65# To enhance readability, labels are rounded to the number of decimal 

66# places corresponding to the tolerance value. 

67def round_difference(difference, tolerance): 

68 n = -math.floor(math.log10(tolerance)) 

69 return round(difference, n) 

70 

71 

72def rescale_difference(x, min_val, max_val): 

73 return (x - min_val) / (max_val - min_val) 

74 

75 

76def min_max_difference(profile1, profile2): 

77 f1_events = [f1[TOTAL_TIME_RATIO] for _, f1 in sorted_iteritems(profile1.functions)] 

78 f2_events = [f2[TOTAL_TIME_RATIO] for _, f2 in sorted_iteritems(profile2.functions)] 

79 differences = [] 

80 for i in range(len(f1_events)): 

81 try: 

82 differences.append(abs(f1_events[i] - f2_events[i]) * 100) 

83 except IndexError: 

84 differences.append(0) 

85 

86 return min(differences), max(differences) 

87 

88 

89tol = 2 ** -23 

90 

91def ratio(numerator, denominator): 

92 try: 

93 ratio = float(numerator)/float(denominator) 

94 except ZeroDivisionError: 

95 # 0/0 is undefined, but 1.0 yields more useful results 

96 return 1.0 

97 if ratio < 0.0: 

98 if ratio < -tol: 

99 sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator)) 

100 return 0.0 

101 if ratio > 1.0: 

102 if ratio > 1.0 + tol: 

103 sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator)) 

104 return 1.0 

105 return ratio 

106 

107 

108class UndefinedEvent(Exception): 

109 """Raised when attempting to get an event which is undefined.""" 

110 

111 def __init__(self, event): 

112 Exception.__init__(self) 

113 self.event = event 

114 

115 def __str__(self): 

116 return 'unspecified event %s' % self.event.name 

117 

118 

119class Event: 

120 """Describe a kind of event, and its basic operations.""" 

121 

122 def __init__(self, name, null, aggregator, formatter = str): 

123 self.name = name 

124 self._null = null 

125 self._aggregator = aggregator 

126 self._formatter = formatter 

127 

128 def __repr__(self): 

129 return self.name 

130 

131 def null(self): 

132 return self._null 

133 

134 def aggregate(self, val1, val2): 

135 """Aggregate two event values.""" 

136 assert val1 is not None 

137 assert val2 is not None 

138 return self._aggregator(val1, val2) 

139 

140 def format(self, val): 

141 """Format an event value.""" 

142 assert val is not None 

143 return self._formatter(val) 

144 

145 

146CALLS = Event("Calls", 0, add, times) 

147SAMPLES = Event("Samples", 0, add, times) 

148SAMPLES2 = Event("Samples", 0, add, times) 

149 

150# Count of samples where a given function was either executing or on the stack. 

151# This is used to calculate the total time ratio according to the 

152# straightforward method described in Mike Dunlavey's answer to 

153# stackoverflow.com/questions/1777556/alternatives-to-gprof, item 4 (the myth 

154# "that recursion is a tricky confusing issue"), last edited 2012-08-30: it's 

155# just the ratio of TOTAL_SAMPLES over the number of samples in the profile. 

156# 

157# Used only when totalMethod == callstacks 

158TOTAL_SAMPLES = Event("Samples", 0, add, times) 

159 

160TIME = Event("Time", 0.0, add, lambda x: '(' + fmttime(x) + ')') 

161TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')') 

162TOTAL_TIME = Event("Total time", 0.0, fail, fmttime) 

163TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage) 

164 

165labels = { 

166 'self-time': TIME, 

167 'self-time-percentage': TIME_RATIO, 

168 'total-time': TOTAL_TIME, 

169 'total-time-percentage': TOTAL_TIME_RATIO, 

170} 

171defaultLabelNames = ['total-time-percentage', 'self-time-percentage'] 

172 

173totalMethod = 'callratios' 

174 

175 

176class Object: 

177 """Base class for all objects in profile which can store events.""" 

178 

179 def __init__(self, events=None): 

180 if events is None: 

181 self.events = {} 

182 else: 

183 self.events = events 

184 

185 def __lt__(self, other): 

186 return id(self) < id(other) 

187 

188 def __contains__(self, event): 

189 return event in self.events 

190 

191 def __getitem__(self, event): 

192 try: 

193 return self.events[event] 

194 except KeyError: 

195 raise UndefinedEvent(event) 

196 

197 def __setitem__(self, event, value): 

198 if value is None: 

199 if event in self.events: 

200 del self.events[event] 

201 else: 

202 self.events[event] = value 

203 

204 

205class Call(Object): 

206 """A call between functions. 

207 

208 There should be at most one call object for every pair of functions. 

209 """ 

210 

211 def __init__(self, callee_id): 

212 Object.__init__(self) 

213 self.callee_id = callee_id 

214 self.ratio = None 

215 self.weight = None 

216 

217 

218class Function(Object): 

219 """A function.""" 

220 

221 def __init__(self, id, name): 

222 Object.__init__(self) 

223 self.id = id 

224 self.name = name 

225 self.module = None 

226 self.process = None 

227 self.calls = {} 

228 self.called = None 

229 self.weight = None 

230 self.cycle = None 

231 self.filename = None 

232 

233 def add_call(self, call): 

234 if call.callee_id in self.calls: 

235 sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id))) 

236 self.calls[call.callee_id] = call 

237 

238 def get_call(self, callee_id): 

239 if not callee_id in self.calls: 

240 call = Call(callee_id) 

241 call[SAMPLES] = 0 

242 call[SAMPLES2] = 0 

243 call[CALLS] = 0 

244 self.calls[callee_id] = call 

245 return self.calls[callee_id] 

246 

247 _parenthesis_re = re.compile(r'\([^()]*\)') 

248 _angles_re = re.compile(r'<[^<>]*>') 

249 _const_re = re.compile(r'\s+const$') 

250 

251 def stripped_name(self): 

252 """Remove extraneous information from C++ demangled function names.""" 

253 

254 name = self.name 

255 

256 # Strip function parameters from name by recursively removing paired parenthesis 

257 while True: 

258 name, n = self._parenthesis_re.subn('', name) 

259 if not n: 

260 break 

261 

262 # Strip const qualifier 

263 name = self._const_re.sub('', name) 

264 

265 # Strip template parameters from name by recursively removing paired angles 

266 while True: 

267 name, n = self._angles_re.subn('', name) 

268 if not n: 

269 break 

270 

271 return name 

272 

273 # TODO: write utility functions 

274 

275 def __repr__(self): 

276 return self.name 

277 

278 def dump(self, sep1=",\n\t", sep2=":=", sep3="\n"): 

279 """ Returns as a string all information available in this Function object 

280 separators sep1:between entries 

281 sep2:between attribute name and value, 

282 sep3: inserted at end 

283 """ 

284 return sep1.join(sep2.join([k,str(v)]) for (k,v) in sorted(self.__dict__.items())) + sep3 

285 

286 

287class Cycle(Object): 

288 """A cycle made from recursive function calls.""" 

289 

290 def __init__(self): 

291 Object.__init__(self) 

292 self.functions = set() 

293 

294 def add_function(self, function): 

295 assert function not in self.functions 

296 self.functions.add(function) 

297 if function.cycle is not None: 

298 for other in function.cycle.functions: 

299 if function not in self.functions: 

300 self.add_function(other) 

301 function.cycle = self 

302 

303 

304class Profile(Object): 

305 """The whole profile.""" 

306 

307 def __init__(self): 

308 Object.__init__(self) 

309 self.functions = {} 

310 self.cycles = [] 

311 

312 def add_function(self, function): 

313 if function.id in self.functions: 

314 sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id))) 

315 self.functions[function.id] = function 

316 

317 def add_cycle(self, cycle): 

318 self.cycles.append(cycle) 

319 

320 def validate(self): 

321 """Validate the edges.""" 

322 

323 for function in self.functions.values(): 

324 for callee_id in list(function.calls.keys()): 

325 assert function.calls[callee_id].callee_id == callee_id 

326 if callee_id not in self.functions: 

327 sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name)) 

328 del function.calls[callee_id] 

329 

330 def find_cycles(self): 

331 """Find cycles using Tarjan's strongly connected components algorithm.""" 

332 

333 # Apply the Tarjan's algorithm successively until all functions are visited 

334 stack = [] 

335 data = {} 

336 order = 0 

337 for function in self.functions.values(): 

338 order = self._tarjan(function, order, stack, data) 

339 cycles = [] 

340 for function in self.functions.values(): 

341 if function.cycle is not None and function.cycle not in cycles: 

342 cycles.append(function.cycle) 

343 self.cycles = cycles 

344 if 0: 

345 for cycle in cycles: 

346 sys.stderr.write("Cycle:\n") 

347 for member in cycle.functions: 

348 sys.stderr.write("\tFunction %s\n" % member.name) 

349 

350 def prune_root(self, roots, depth=-1): 

351 visited = set() 

352 frontier = set([(root_node, depth) for root_node in roots]) 

353 while len(frontier) > 0: 

354 node, node_depth = frontier.pop() 

355 visited.add(node) 

356 if node_depth == 0: 

357 continue 

358 f = self.functions[node] 

359 newNodes = set(f.calls.keys()) - visited 

360 frontier = frontier.union({(new_node, node_depth - 1) for new_node in newNodes}) 

361 subtreeFunctions = {} 

362 for n in visited: 

363 f = self.functions[n] 

364 newCalls = {} 

365 for c in f.calls.keys(): 

366 if c in visited: 

367 newCalls[c] = f.calls[c] 

368 f.calls = newCalls 

369 subtreeFunctions[n] = f 

370 self.functions = subtreeFunctions 

371 

372 def prune_leaf(self, leafs, depth=-1): 

373 edgesUp = collections.defaultdict(set) 

374 for f in self.functions.keys(): 

375 for n in self.functions[f].calls.keys(): 

376 edgesUp[n].add(f) 

377 # build the tree up 

378 visited = set() 

379 frontier = set([(leaf_node, depth) for leaf_node in leafs]) 

380 while len(frontier) > 0: 

381 node, node_depth = frontier.pop() 

382 visited.add(node) 

383 if node_depth == 0: 

384 continue 

385 newNodes = edgesUp[node] - visited 

386 frontier = frontier.union({(new_node, node_depth - 1) for new_node in newNodes}) 

387 downTree = set(self.functions.keys()) 

388 upTree = visited 

389 path = downTree.intersection(upTree) 

390 pathFunctions = {} 

391 for n in path: 

392 f = self.functions[n] 

393 newCalls = {} 

394 for c in f.calls.keys(): 

395 if c in path: 

396 newCalls[c] = f.calls[c] 

397 f.calls = newCalls 

398 pathFunctions[n] = f 

399 self.functions = pathFunctions 

400 

401 def getFunctionIds(self, funcName): 

402 function_names = {v.name: k for (k, v) in self.functions.items()} 

403 return [function_names[name] for name in fnmatch.filter(function_names.keys(), funcName)] 

404 

405 def getFunctionId(self, funcName): 

406 for f in self.functions: 

407 if self.functions[f].name == funcName: 

408 return f 

409 return False 

410 

411 def printFunctionIds(self, selector=None, file=sys.stderr): 

412 """ Print to file function entries selected by fnmatch.fnmatch like in 

413 method getFunctionIds, with following extensions: 

414 - selector starts with "%": dump all information available 

415 - selector is '+' or '-': select all function entries 

416 """ 

417 if selector is None or selector in ("+", "*"): 

418 v = ",\n".join(("%s:\t%s" % (kf,self.functions[kf].name) 

419 for kf in self.functions.keys())) 

420 else: 

421 if selector[0]=="%": 

422 selector=selector[1:] 

423 function_info={k:v for (k,v) 

424 in self.functions.items() 

425 if fnmatch.fnmatch(v.name,selector)} 

426 v = ",\n".join( ("%s\t({k})\t(%s)::\n\t%s" % (v.name,type(v),v.dump()) 

427 for (k,v) in function_info.items() 

428 )) 

429 

430 else: 

431 function_names = (v.name for v in self.functions.values()) 

432 v = ",\n".join( ( nm for nm in fnmatch.filter(function_names,selector ))) 

433 

434 file.write(v+"\n") 

435 file.flush() 

436 

437 class _TarjanData: 

438 def __init__(self, order): 

439 self.order = order 

440 self.lowlink = order 

441 self.onstack = False 

442 

443 def _tarjan(self, function, order, stack, data): 

444 """Tarjan's strongly connected components algorithm. 

445 

446 See also: 

447 - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm 

448 """ 

449 

450 try: 

451 func_data = data[function.id] 

452 return order 

453 except KeyError: 

454 func_data = self._TarjanData(order) 

455 data[function.id] = func_data 

456 order += 1 

457 pos = len(stack) 

458 stack.append(function) 

459 func_data.onstack = True 

460 for call in function.calls.values(): 

461 try: 

462 callee_data = data[call.callee_id] 

463 if callee_data.onstack: 

464 func_data.lowlink = min(func_data.lowlink, callee_data.order) 

465 except KeyError: 

466 callee = self.functions[call.callee_id] 

467 order = self._tarjan(callee, order, stack, data) 

468 callee_data = data[call.callee_id] 

469 func_data.lowlink = min(func_data.lowlink, callee_data.lowlink) 

470 if func_data.lowlink == func_data.order: 

471 # Strongly connected component found 

472 members = stack[pos:] 

473 del stack[pos:] 

474 if len(members) > 1: 

475 cycle = Cycle() 

476 for member in members: 

477 cycle.add_function(member) 

478 data[member.id].onstack = False 

479 else: 

480 for member in members: 

481 data[member.id].onstack = False 

482 return order 

483 

484 def call_ratios(self, event): 

485 # Aggregate for incoming calls 

486 cycle_totals = {} 

487 for cycle in self.cycles: 

488 cycle_totals[cycle] = 0.0 

489 function_totals = {} 

490 for function in self.functions.values(): 

491 function_totals[function] = 0.0 

492 

493 # Pass 1: function_total gets the sum of call[event] for all 

494 # incoming arrows. Same for cycle_total for all arrows 

495 # that are coming into the *cycle* but are not part of it. 

496 for function in self.functions.values(): 

497 for call in function.calls.values(): 

498 if call.callee_id != function.id: 

499 callee = self.functions[call.callee_id] 

500 if event in call.events: 

501 function_totals[callee] += call[event] 

502 if callee.cycle is not None and callee.cycle is not function.cycle: 

503 cycle_totals[callee.cycle] += call[event] 

504 else: 

505 sys.stderr.write("call_ratios: No data for " + function.name + " call to " + callee.name + "\n") 

506 

507 # Pass 2: Compute the ratios. Each call[event] is scaled by the 

508 # function_total of the callee. Calls into cycles use the 

509 # cycle_total, but not calls within cycles. 

510 for function in self.functions.values(): 

511 for call in function.calls.values(): 

512 assert call.ratio is None 

513 if call.callee_id != function.id: 

514 callee = self.functions[call.callee_id] 

515 if event in call.events: 

516 if callee.cycle is not None and callee.cycle is not function.cycle: 

517 total = cycle_totals[callee.cycle] 

518 else: 

519 total = function_totals[callee] 

520 call.ratio = ratio(call[event], total) 

521 else: 

522 # Warnings here would only repeat those issued above. 

523 call.ratio = 0.0 

524 

525 def integrate(self, outevent, inevent): 

526 """Propagate function time ratio along the function calls. 

527 

528 Must be called after finding the cycles. 

529 

530 See also: 

531 - http://citeseer.ist.psu.edu/graham82gprof.html 

532 """ 

533 

534 # Sanity checking 

535 assert outevent not in self 

536 for function in self.functions.values(): 

537 assert outevent not in function 

538 assert inevent in function 

539 for call in function.calls.values(): 

540 assert outevent not in call 

541 if call.callee_id != function.id: 

542 assert call.ratio is not None 

543 

544 # Aggregate the input for each cycle 

545 for cycle in self.cycles: 

546 total = inevent.null() 

547 for function in self.functions.values(): 

548 total = inevent.aggregate(total, function[inevent]) 

549 self[inevent] = total 

550 

551 # Integrate along the edges 

552 total = inevent.null() 

553 for function in self.functions.values(): 

554 total = inevent.aggregate(total, function[inevent]) 

555 self._integrate_function(function, outevent, inevent) 

556 self[outevent] = total 

557 

558 def _integrate_function(self, function, outevent, inevent): 

559 if function.cycle is not None: 

560 return self._integrate_cycle(function.cycle, outevent, inevent) 

561 else: 

562 if outevent not in function: 

563 total = function[inevent] 

564 for call in function.calls.values(): 

565 if call.callee_id != function.id: 

566 total += self._integrate_call(call, outevent, inevent) 

567 function[outevent] = total 

568 return function[outevent] 

569 

570 def _integrate_call(self, call, outevent, inevent): 

571 assert outevent not in call 

572 assert call.ratio is not None 

573 callee = self.functions[call.callee_id] 

574 subtotal = call.ratio *self._integrate_function(callee, outevent, inevent) 

575 call[outevent] = subtotal 

576 return subtotal 

577 

578 def _integrate_cycle(self, cycle, outevent, inevent): 

579 if outevent not in cycle: 

580 

581 # Compute the outevent for the whole cycle 

582 total = inevent.null() 

583 for member in cycle.functions: 

584 subtotal = member[inevent] 

585 for call in member.calls.values(): 

586 callee = self.functions[call.callee_id] 

587 if callee.cycle is not cycle: 

588 subtotal += self._integrate_call(call, outevent, inevent) 

589 total += subtotal 

590 cycle[outevent] = total 

591 

592 # Compute the time propagated to callers of this cycle 

593 callees = {} 

594 for function in self.functions.values(): 

595 if function.cycle is not cycle: 

596 for call in function.calls.values(): 

597 callee = self.functions[call.callee_id] 

598 if callee.cycle is cycle: 

599 try: 

600 callees[callee] += call.ratio 

601 except KeyError: 

602 callees[callee] = call.ratio 

603 

604 for member in cycle.functions: 

605 member[outevent] = outevent.null() 

606 

607 for callee, call_ratio in callees.items(): 

608 ranks = {} 

609 call_ratios = {} 

610 partials = {} 

611 self._rank_cycle_function(cycle, callee, ranks) 

612 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set()) 

613 partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent) 

614 

615 # Ensure `partial == max(partials.values())`, but with round-off tolerance 

616 max_partial = max(partials.values()) 

617 assert abs(partial - max_partial) <= 1e-7*max_partial 

618 

619 assert abs(call_ratio*total - partial) <= 0.001*call_ratio*total 

620 

621 return cycle[outevent] 

622 

623 def _rank_cycle_function(self, cycle, function, ranks): 

624 """Dijkstra's shortest paths algorithm. 

625 

626 See also: 

627 - http://en.wikipedia.org/wiki/Dijkstra's_algorithm 

628 """ 

629 

630 import heapq 

631 Q = [] 

632 Qd = {} 

633 p = {} 

634 visited = set([function]) 

635 

636 ranks[function] = 0 

637 for call in function.calls.values(): 

638 if call.callee_id != function.id: 

639 callee = self.functions[call.callee_id] 

640 if callee.cycle is cycle: 

641 ranks[callee] = 1 

642 item = [ranks[callee], function, callee] 

643 heapq.heappush(Q, item) 

644 Qd[callee] = item 

645 

646 while Q: 

647 cost, parent, member = heapq.heappop(Q) 

648 if member not in visited: 

649 p[member]= parent 

650 visited.add(member) 

651 for call in member.calls.values(): 

652 if call.callee_id != member.id: 

653 callee = self.functions[call.callee_id] 

654 if callee.cycle is cycle: 

655 member_rank = ranks[member] 

656 rank = ranks.get(callee) 

657 if rank is not None: 

658 if rank > 1 + member_rank: 

659 rank = 1 + member_rank 

660 ranks[callee] = rank 

661 Qd_callee = Qd[callee] 

662 Qd_callee[0] = rank 

663 Qd_callee[1] = member 

664 heapq._siftdown(Q, 0, Q.index(Qd_callee)) 

665 else: 

666 rank = 1 + member_rank 

667 ranks[callee] = rank 

668 item = [rank, member, callee] 

669 heapq.heappush(Q, item) 

670 Qd[callee] = item 

671 

672 def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited): 

673 if function not in visited: 

674 visited.add(function) 

675 for call in function.calls.values(): 

676 if call.callee_id != function.id: 

677 callee = self.functions[call.callee_id] 

678 if callee.cycle is cycle: 

679 if ranks[callee] > ranks[function]: 

680 call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio 

681 self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited) 

682 

683 def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent): 

684 if function not in partials: 

685 partial = partial_ratio*function[inevent] 

686 for call in function.calls.values(): 

687 if call.callee_id != function.id: 

688 callee = self.functions[call.callee_id] 

689 if callee.cycle is not cycle: 

690 assert outevent in call 

691 partial += partial_ratio*call[outevent] 

692 else: 

693 if ranks[callee] > ranks[function]: 

694 callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent) 

695 call_ratio = ratio(call.ratio, call_ratios[callee]) 

696 call_partial = call_ratio*callee_partial 

697 try: 

698 call[outevent] += call_partial 

699 except UndefinedEvent: 

700 call[outevent] = call_partial 

701 partial += call_partial 

702 partials[function] = partial 

703 try: 

704 function[outevent] += partial 

705 except UndefinedEvent: 

706 function[outevent] = partial 

707 return partials[function] 

708 

709 def aggregate(self, event): 

710 """Aggregate an event for the whole profile.""" 

711 

712 total = event.null() 

713 for function in self.functions.values(): 

714 try: 

715 total = event.aggregate(total, function[event]) 

716 except UndefinedEvent: 

717 return 

718 self[event] = total 

719 

720 def ratio(self, outevent, inevent): 

721 assert outevent not in self 

722 assert inevent in self 

723 for function in self.functions.values(): 

724 assert outevent not in function 

725 assert inevent in function 

726 function[outevent] = ratio(function[inevent], self[inevent]) 

727 for call in function.calls.values(): 

728 assert outevent not in call 

729 if inevent in call: 

730 call[outevent] = ratio(call[inevent], self[inevent]) 

731 self[outevent] = 1.0 

732 

733 def prune(self, node_thres, edge_thres, paths, color_nodes_by_selftime): 

734 """Prune the profile""" 

735 

736 # compute the prune ratios 

737 for function in self.functions.values(): 

738 try: 

739 function.weight = function[TOTAL_TIME_RATIO] 

740 except UndefinedEvent: 

741 pass 

742 

743 for call in function.calls.values(): 

744 callee = self.functions[call.callee_id] 

745 

746 if TOTAL_TIME_RATIO in call: 

747 # handle exact cases first 

748 call.weight = call[TOTAL_TIME_RATIO] 

749 else: 

750 try: 

751 # make a safe estimate 

752 call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) 

753 except UndefinedEvent: 

754 pass 

755 

756 # prune the nodes 

757 for function_id in list(self.functions.keys()): 

758 function = self.functions[function_id] 

759 if function.weight is not None: 

760 if function.weight < node_thres: 

761 del self.functions[function_id] 

762 

763 # prune file paths 

764 for function_id in list(self.functions.keys()): 

765 function = self.functions[function_id] 

766 if paths and function.filename and not any(function.filename.startswith(path) for path in paths): 

767 del self.functions[function_id] 

768 elif paths and function.module and not any((function.module.find(path)>-1) for path in paths): 

769 del self.functions[function_id] 

770 

771 # prune the edges 

772 for function in self.functions.values(): 

773 for callee_id in list(function.calls.keys()): 

774 call = function.calls[callee_id] 

775 if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres: 

776 del function.calls[callee_id] 

777 

778 if color_nodes_by_selftime: 

779 weights = [] 

780 for function in self.functions.values(): 

781 try: 

782 weights.append(function[TIME_RATIO]) 

783 except UndefinedEvent: 

784 pass 

785 max_ratio = max(weights or [1]) 

786 

787 # apply rescaled weights for coloriung 

788 for function in self.functions.values(): 

789 try: 

790 function.weight = function[TIME_RATIO] / max_ratio 

791 except (ZeroDivisionError, UndefinedEvent): 

792 pass 

793 

794 def dump(self): 

795 for function in self.functions.values(): 

796 sys.stderr.write('Function %s:\n' % (function.name,)) 

797 self._dump_events(function.events) 

798 for call in function.calls.values(): 

799 callee = self.functions[call.callee_id] 

800 sys.stderr.write(' Call %s:\n' % (callee.name,)) 

801 self._dump_events(call.events) 

802 for cycle in self.cycles: 

803 sys.stderr.write('Cycle:\n') 

804 self._dump_events(cycle.events) 

805 for function in cycle.functions: 

806 sys.stderr.write(' Function %s\n' % (function.name,)) 

807 

808 def _dump_events(self, events): 

809 for event, value in events.items(): 

810 sys.stderr.write(' %s: %s\n' % (event.name, event.format(value))) 

811 

812 

813 

814######################################################################## 

815# Parsers 

816 

817 

818class Struct: 

819 """Masquerade a dictionary with a structure-like behavior.""" 

820 

821 def __init__(self, attrs = None): 

822 if attrs is None: 

823 attrs = {} 

824 self.__dict__['_attrs'] = attrs 

825 

826 def __getattr__(self, name): 

827 try: 

828 return self._attrs[name] 

829 except KeyError: 

830 raise AttributeError(name) 

831 

832 def __setattr__(self, name, value): 

833 self._attrs[name] = value 

834 

835 def __str__(self): 

836 return str(self._attrs) 

837 

838 def __repr__(self): 

839 return repr(self._attrs) 

840 

841 

842class ParseError(Exception): 

843 """Raised when parsing to signal mismatches.""" 

844 

845 def __init__(self, msg, line): 

846 Exception.__init__(self) 

847 self.msg = msg 

848 # TODO: store more source line information 

849 self.line = line 

850 

851 def __str__(self): 

852 return '%s: %r' % (self.msg, self.line) 

853 

854 

855class Parser: 

856 """Parser interface.""" 

857 

858 stdinInput = True 

859 multipleInput = False 

860 

861 def __init__(self): 

862 pass 

863 

864 def parse(self): 

865 raise NotImplementedError 

866 

867 

868class JsonParser(Parser): 

869 """Parser for a custom JSON representation of profile data. 

870 

871 See schema.json for details. 

872 """ 

873 

874 

875 def __init__(self, stream): 

876 Parser.__init__(self) 

877 self.stream = stream 

878 

879 def parse(self): 

880 

881 obj = json.load(self.stream) 

882 

883 assert obj['version'] == 0 

884 

885 profile = Profile() 

886 profile[SAMPLES] = 0 

887 

888 fns = obj['functions'] 

889 

890 for functionIndex in range(len(fns)): 

891 fn = fns[functionIndex] 

892 function = Function(functionIndex, fn['name']) 

893 try: 

894 function.module = fn['module'] 

895 except KeyError: 

896 pass 

897 try: 

898 function.process = fn['process'] 

899 except KeyError: 

900 pass 

901 function[SAMPLES] = 0 

902 function.called = 0 

903 profile.add_function(function) 

904 

905 for event in obj['events']: 

906 callchain = [] 

907 

908 for functionIndex in event['callchain']: 

909 function = profile.functions[functionIndex] 

910 callchain.append(function) 

911 

912 # increment the call count of the first in the callchain 

913 function = profile.functions[event['callchain'][0]] 

914 function.called = function.called + 1 

915 

916 cost = event['cost'][0] 

917 

918 callee = callchain[0] 

919 callee[SAMPLES] += cost 

920 profile[SAMPLES] += cost 

921 

922 for caller in callchain[1:]: 

923 try: 

924 call = caller.calls[callee.id] 

925 except KeyError: 

926 call = Call(callee.id) 

927 call[SAMPLES2] = cost 

928 caller.add_call(call) 

929 else: 

930 call[SAMPLES2] += cost 

931 

932 callee = caller 

933 

934 if False: 

935 profile.dump() 

936 

937 # compute derived data 

938 profile.validate() 

939 profile.find_cycles() 

940 profile.ratio(TIME_RATIO, SAMPLES) 

941 profile.call_ratios(SAMPLES2) 

942 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

943 

944 return profile 

945 

946 

947class LineParser(Parser): 

948 """Base class for parsers that read line-based formats.""" 

949 

950 def __init__(self, stream): 

951 Parser.__init__(self) 

952 self._stream = stream 

953 self.__line = None 

954 self.__eof = False 

955 self.line_no = 0 

956 

957 def readline(self): 

958 line = self._stream.readline() 

959 if not line: 

960 self.__line = '' 

961 self.__eof = True 

962 else: 

963 self.line_no += 1 

964 line = line.rstrip('\r\n') 

965 self.__line = line 

966 

967 def lookahead(self): 

968 assert self.__line is not None 

969 return self.__line 

970 

971 def consume(self): 

972 assert self.__line is not None 

973 line = self.__line 

974 self.readline() 

975 return line 

976 

977 def eof(self): 

978 assert self.__line is not None 

979 return self.__eof 

980 

981 

982XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4) 

983 

984 

985class XmlToken: 

986 

987 def __init__(self, type, name_or_data, attrs = None, line = None, column = None): 

988 assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF) 

989 self.type = type 

990 self.name_or_data = name_or_data 

991 self.attrs = attrs 

992 self.line = line 

993 self.column = column 

994 

995 def __str__(self): 

996 if self.type == XML_ELEMENT_START: 

997 return '<' + self.name_or_data + ' ...>' 

998 if self.type == XML_ELEMENT_END: 

999 return '</' + self.name_or_data + '>' 

1000 if self.type == XML_CHARACTER_DATA: 

1001 return self.name_or_data 

1002 if self.type == XML_EOF: 

1003 return 'end of file' 

1004 assert 0 

1005 

1006 

1007class XmlTokenizer: 

1008 """Expat based XML tokenizer.""" 

1009 

1010 def __init__(self, fp, skip_ws = True): 

1011 self.fp = fp 

1012 self.tokens = [] 

1013 self.index = 0 

1014 self.final = False 

1015 self.skip_ws = skip_ws 

1016 

1017 self.character_pos = 0, 0 

1018 self.character_data = '' 

1019 

1020 self.parser = xml.parsers.expat.ParserCreate() 

1021 self.parser.StartElementHandler = self.handle_element_start 

1022 self.parser.EndElementHandler = self.handle_element_end 

1023 self.parser.CharacterDataHandler = self.handle_character_data 

1024 

1025 def handle_element_start(self, name, attributes): 

1026 self.finish_character_data() 

1027 line, column = self.pos() 

1028 token = XmlToken(XML_ELEMENT_START, name, attributes, line, column) 

1029 self.tokens.append(token) 

1030 

1031 def handle_element_end(self, name): 

1032 self.finish_character_data() 

1033 line, column = self.pos() 

1034 token = XmlToken(XML_ELEMENT_END, name, None, line, column) 

1035 self.tokens.append(token) 

1036 

1037 def handle_character_data(self, data): 

1038 if not self.character_data: 

1039 self.character_pos = self.pos() 

1040 self.character_data += data 

1041 

1042 def finish_character_data(self): 

1043 if self.character_data: 

1044 if not self.skip_ws or not self.character_data.isspace(): 

1045 line, column = self.character_pos 

1046 token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column) 

1047 self.tokens.append(token) 

1048 self.character_data = '' 

1049 

1050 def next(self): 

1051 size = 16*1024 

1052 while self.index >= len(self.tokens) and not self.final: 

1053 self.tokens = [] 

1054 self.index = 0 

1055 data = self.fp.read(size) 

1056 self.final = len(data) < size 

1057 self.parser.Parse(data, self.final) 

1058 if self.index >= len(self.tokens): 

1059 line, column = self.pos() 

1060 token = XmlToken(XML_EOF, None, None, line, column) 

1061 else: 

1062 token = self.tokens[self.index] 

1063 self.index += 1 

1064 return token 

1065 

1066 def pos(self): 

1067 return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber 

1068 

1069 

1070class XmlTokenMismatch(Exception): 

1071 

1072 def __init__(self, expected, found): 

1073 Exception.__init__(self) 

1074 self.expected = expected 

1075 self.found = found 

1076 

1077 def __str__(self): 

1078 return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found)) 

1079 

1080 

1081class XmlParser(Parser): 

1082 """Base XML document parser.""" 

1083 

1084 def __init__(self, fp): 

1085 Parser.__init__(self) 

1086 self.tokenizer = XmlTokenizer(fp) 

1087 self.consume() 

1088 

1089 def consume(self): 

1090 self.token = self.tokenizer.next() 

1091 

1092 def match_element_start(self, name): 

1093 return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name 

1094 

1095 def match_element_end(self, name): 

1096 return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name 

1097 

1098 def element_start(self, name): 

1099 while self.token.type == XML_CHARACTER_DATA: 

1100 self.consume() 

1101 if self.token.type != XML_ELEMENT_START: 

1102 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) 

1103 if self.token.name_or_data != name: 

1104 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token) 

1105 attrs = self.token.attrs 

1106 self.consume() 

1107 return attrs 

1108 

1109 def element_end(self, name): 

1110 while self.token.type == XML_CHARACTER_DATA: 

1111 self.consume() 

1112 if self.token.type != XML_ELEMENT_END: 

1113 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) 

1114 if self.token.name_or_data != name: 

1115 raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token) 

1116 self.consume() 

1117 

1118 def character_data(self, strip = True): 

1119 data = '' 

1120 while self.token.type == XML_CHARACTER_DATA: 

1121 data += self.token.name_or_data 

1122 self.consume() 

1123 if strip: 

1124 data = data.strip() 

1125 return data 

1126 

1127 

1128class GprofParser(Parser): 

1129 """Parser for GNU gprof output. 

1130 

1131 See also: 

1132 - Chapter "Interpreting gprof's Output" from the GNU gprof manual 

1133 http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph 

1134 - File "cg_print.c" from the GNU gprof source code 

1135 http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src 

1136 """ 

1137 

1138 def __init__(self, fp): 

1139 Parser.__init__(self) 

1140 self.fp = fp 

1141 self.functions = {} 

1142 self.cycles = {} 

1143 

1144 def readline(self): 

1145 line = self.fp.readline() 

1146 if not line: 

1147 sys.stderr.write('error: unexpected end of file\n') 

1148 sys.exit(1) 

1149 line = line.rstrip('\r\n') 

1150 return line 

1151 

1152 _int_re = re.compile(r'^\d+$') 

1153 _float_re = re.compile(r'^\d+\.\d+$') 

1154 

1155 def translate(self, mo): 

1156 """Extract a structure from a match object, while translating the types in the process.""" 

1157 attrs = {} 

1158 groupdict = mo.groupdict() 

1159 for name, value in groupdict.items(): 

1160 if value is None: 

1161 value = None 

1162 elif self._int_re.match(value): 

1163 value = int(value) 

1164 elif self._float_re.match(value): 

1165 value = float(value) 

1166 attrs[name] = (value) 

1167 return Struct(attrs) 

1168 

1169 _cg_header_re = re.compile( 

1170 # original gprof header 

1171 r'^\s+called/total\s+parents\s*$|' + 

1172 r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' + 

1173 r'^\s+called/total\s+children\s*$|' + 

1174 # GNU gprof header 

1175 r'^index\s+%\s+(time\s+)?self\s+children\s+called\s+name\s*$' 

1176 ) 

1177 

1178 _cg_ignore_re = re.compile( 

1179 # spontaneous 

1180 r'^\s+<spontaneous>\s*$|' 

1181 # internal calls (such as "mcount") 

1182 r'^.*\((\d+)\)$' 

1183 ) 

1184 

1185 _cg_primary_re = re.compile( 

1186 r'^\[(?P<index>\d+)\]?' + 

1187 r'\s+(?P<percentage_time>\d+\.\d+)' + 

1188 r'\s+(?P<self>\d+\.\d+)' + 

1189 r'\s+(?P<descendants>\d+\.\d+)' + 

1190 r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 

1191 r'\s+(?P<name>\S.*?)' + 

1192 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 

1193 r'\s\[(\d+)\]$' 

1194 ) 

1195 

1196 _cg_parent_re = re.compile( 

1197 r'^\s+(?P<self>\d+\.\d+)?' + 

1198 r'\s+(?P<descendants>\d+\.\d+)?' + 

1199 r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' + 

1200 r'\s+(?P<name>\S.*?)' + 

1201 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 

1202 r'\s\[(?P<index>\d+)\]$' 

1203 ) 

1204 

1205 _cg_child_re = _cg_parent_re 

1206 

1207 _cg_cycle_header_re = re.compile( 

1208 r'^\[(?P<index>\d+)\]?' + 

1209 r'\s+(?P<percentage_time>\d+\.\d+)' + 

1210 r'\s+(?P<self>\d+\.\d+)' + 

1211 r'\s+(?P<descendants>\d+\.\d+)' + 

1212 r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 

1213 r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' + 

1214 r'\s\[(\d+)\]$' 

1215 ) 

1216 

1217 _cg_cycle_member_re = re.compile( 

1218 r'^\s+(?P<self>\d+\.\d+)?' + 

1219 r'\s+(?P<descendants>\d+\.\d+)?' + 

1220 r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' + 

1221 r'\s+(?P<name>\S.*?)' + 

1222 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 

1223 r'\s\[(?P<index>\d+)\]$' 

1224 ) 

1225 

1226 _cg_sep_re = re.compile(r'^--+$') 

1227 

1228 def parse_function_entry(self, lines): 

1229 parents = [] 

1230 children = [] 

1231 

1232 while True: 

1233 if not lines: 

1234 sys.stderr.write('warning: unexpected end of entry\n') 

1235 line = lines.pop(0) 

1236 if line.startswith('['): 

1237 break 

1238 

1239 # read function parent line 

1240 mo = self._cg_parent_re.match(line) 

1241 if not mo: 

1242 if self._cg_ignore_re.match(line): 

1243 continue 

1244 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 

1245 else: 

1246 parent = self.translate(mo) 

1247 parents.append(parent) 

1248 

1249 # read primary line 

1250 mo = self._cg_primary_re.match(line) 

1251 if not mo: 

1252 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 

1253 return 

1254 else: 

1255 function = self.translate(mo) 

1256 

1257 while lines: 

1258 line = lines.pop(0) 

1259 

1260 # read function subroutine line 

1261 mo = self._cg_child_re.match(line) 

1262 if not mo: 

1263 if self._cg_ignore_re.match(line): 

1264 continue 

1265 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 

1266 else: 

1267 child = self.translate(mo) 

1268 children.append(child) 

1269 

1270 function.parents = parents 

1271 function.children = children 

1272 

1273 self.functions[function.index] = function 

1274 

1275 def parse_cycle_entry(self, lines): 

1276 

1277 # read cycle header line 

1278 line = lines[0] 

1279 mo = self._cg_cycle_header_re.match(line) 

1280 if not mo: 

1281 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 

1282 return 

1283 cycle = self.translate(mo) 

1284 

1285 # read cycle member lines 

1286 cycle.functions = [] 

1287 for line in lines[1:]: 

1288 mo = self._cg_cycle_member_re.match(line) 

1289 if not mo: 

1290 sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line) 

1291 continue 

1292 call = self.translate(mo) 

1293 cycle.functions.append(call) 

1294 

1295 self.cycles[cycle.cycle] = cycle 

1296 

1297 def parse_cg_entry(self, lines): 

1298 if lines[0].startswith("["): 

1299 self.parse_cycle_entry(lines) 

1300 else: 

1301 self.parse_function_entry(lines) 

1302 

1303 def parse_cg(self): 

1304 """Parse the call graph.""" 

1305 

1306 # skip call graph header 

1307 while not self._cg_header_re.match(self.readline()): 

1308 pass 

1309 line = self.readline() 

1310 while self._cg_header_re.match(line): 

1311 line = self.readline() 

1312 

1313 # process call graph entries 

1314 entry_lines = [] 

1315 while line != '\014': # form feed 

1316 if line and not line.isspace(): 

1317 if self._cg_sep_re.match(line): 

1318 self.parse_cg_entry(entry_lines) 

1319 entry_lines = [] 

1320 else: 

1321 entry_lines.append(line) 

1322 line = self.readline() 

1323 

1324 def parse(self): 

1325 self.parse_cg() 

1326 self.fp.close() 

1327 

1328 profile = Profile() 

1329 profile[TIME] = 0.0 

1330 

1331 cycles = {} 

1332 for index in self.cycles: 

1333 cycles[index] = Cycle() 

1334 

1335 for entry in self.functions.values(): 

1336 # populate the function 

1337 function = Function(entry.index, entry.name) 

1338 function[TIME] = entry.self 

1339 if entry.called is not None: 

1340 function.called = entry.called 

1341 if entry.called_self is not None: 

1342 call = Call(entry.index) 

1343 call[CALLS] = entry.called_self 

1344 function.called += entry.called_self 

1345 

1346 # populate the function calls 

1347 for child in entry.children: 

1348 call = Call(child.index) 

1349 

1350 assert child.called is not None 

1351 call[CALLS] = child.called 

1352 

1353 if child.index not in self.functions: 

1354 # NOTE: functions that were never called but were discovered by gprof's 

1355 # static call graph analysis dont have a call graph entry so we need 

1356 # to add them here 

1357 missing = Function(child.index, child.name) 

1358 function[TIME] = 0.0 

1359 function.called = 0 

1360 profile.add_function(missing) 

1361 

1362 function.add_call(call) 

1363 

1364 profile.add_function(function) 

1365 

1366 if entry.cycle is not None: 

1367 try: 

1368 cycle = cycles[entry.cycle] 

1369 except KeyError: 

1370 sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle) 

1371 cycle = Cycle() 

1372 cycles[entry.cycle] = cycle 

1373 cycle.add_function(function) 

1374 

1375 profile[TIME] = profile[TIME] + function[TIME] 

1376 

1377 for cycle in cycles.values(): 

1378 profile.add_cycle(cycle) 

1379 

1380 # Compute derived events 

1381 profile.validate() 

1382 profile.ratio(TIME_RATIO, TIME) 

1383 profile.call_ratios(CALLS) 

1384 profile.integrate(TOTAL_TIME, TIME) 

1385 profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) 

1386 

1387 return profile 

1388 

1389 

1390# Clone&hack of GprofParser for VTune Amplifier XE 2013 gprof-cc output. 

1391# Tested only with AXE 2013 for Windows. 

1392# - Use total times as reported by AXE. 

1393# - In the absence of call counts, call ratios are faked from the relative 

1394# proportions of total time. This affects only the weighting of the calls. 

1395# - Different header, separator, and end marker. 

1396# - Extra whitespace after function names. 

1397# - You get a full entry for <spontaneous>, which does not have parents. 

1398# - Cycles do have parents. These are saved but unused (as they are 

1399# for functions). 

1400# - Disambiguated "unrecognized call graph entry" error messages. 

1401# Notes: 

1402# - Total time of functions as reported by AXE passes the val3 test. 

1403# - CPU Time:Children in the input is sometimes a negative number. This 

1404# value goes to the variable descendants, which is unused. 

1405# - The format of gprof-cc reports is unaffected by the use of 

1406# -knob enable-call-counts=true (no call counts, ever), or 

1407# -show-as=samples (results are quoted in seconds regardless). 

1408class AXEParser(Parser): 

1409 "Parser for VTune Amplifier XE 2013 gprof-cc report output." 

1410 

1411 def __init__(self, fp): 

1412 Parser.__init__(self) 

1413 self.fp = fp 

1414 self.functions = {} 

1415 self.cycles = {} 

1416 

1417 def readline(self): 

1418 line = self.fp.readline() 

1419 if not line: 

1420 sys.stderr.write('error: unexpected end of file\n') 

1421 sys.exit(1) 

1422 line = line.rstrip('\r\n') 

1423 return line 

1424 

1425 _int_re = re.compile(r'^\d+$') 

1426 _float_re = re.compile(r'^\d+\.\d+$') 

1427 

1428 def translate(self, mo): 

1429 """Extract a structure from a match object, while translating the types in the process.""" 

1430 attrs = {} 

1431 groupdict = mo.groupdict() 

1432 for name, value in groupdict.items(): 

1433 if value is None: 

1434 value = None 

1435 elif self._int_re.match(value): 

1436 value = int(value) 

1437 elif self._float_re.match(value): 

1438 value = float(value) 

1439 attrs[name] = (value) 

1440 return Struct(attrs) 

1441 

1442 _cg_header_re = re.compile( 

1443 '^Index |' 

1444 '^-----+ ' 

1445 ) 

1446 

1447 _cg_footer_re = re.compile(r'^Index\s+Function\s*$') 

1448 

1449 _cg_primary_re = re.compile( 

1450 r'^\[(?P<index>\d+)\]?' + 

1451 r'\s+(?P<percentage_time>\d+\.\d+)' + 

1452 r'\s+(?P<self>\d+\.\d+)' + 

1453 r'\s+(?P<descendants>\d+\.\d+)' + 

1454 r'\s+(?P<name>\S.*?)' + 

1455 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 

1456 r'\s+\[(\d+)\]' + 

1457 r'\s*$' 

1458 ) 

1459 

1460 _cg_parent_re = re.compile( 

1461 r'^\s+(?P<self>\d+\.\d+)?' + 

1462 r'\s+(?P<descendants>\d+\.\d+)?' + 

1463 r'\s+(?P<name>\S.*?)' + 

1464 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 

1465 r'(?:\s+\[(?P<index>\d+)\]\s*)?' + 

1466 r'\s*$' 

1467 ) 

1468 

1469 _cg_child_re = _cg_parent_re 

1470 

1471 _cg_cycle_header_re = re.compile( 

1472 r'^\[(?P<index>\d+)\]?' + 

1473 r'\s+(?P<percentage_time>\d+\.\d+)' + 

1474 r'\s+(?P<self>\d+\.\d+)' + 

1475 r'\s+(?P<descendants>\d+\.\d+)' + 

1476 r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' + 

1477 r'\s+\[(\d+)\]' + 

1478 r'\s*$' 

1479 ) 

1480 

1481 _cg_cycle_member_re = re.compile( 

1482 r'^\s+(?P<self>\d+\.\d+)?' + 

1483 r'\s+(?P<descendants>\d+\.\d+)?' + 

1484 r'\s+(?P<name>\S.*?)' + 

1485 r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' + 

1486 r'\s+\[(?P<index>\d+)\]' + 

1487 r'\s*$' 

1488 ) 

1489 

1490 def parse_function_entry(self, lines): 

1491 parents = [] 

1492 children = [] 

1493 

1494 while True: 

1495 if not lines: 

1496 sys.stderr.write('warning: unexpected end of entry\n') 

1497 return 

1498 line = lines.pop(0) 

1499 if line.startswith('['): 

1500 break 

1501 

1502 # read function parent line 

1503 mo = self._cg_parent_re.match(line) 

1504 if not mo: 

1505 sys.stderr.write('warning: unrecognized call graph entry (1): %r\n' % line) 

1506 else: 

1507 parent = self.translate(mo) 

1508 if parent.name != '<spontaneous>': 

1509 parents.append(parent) 

1510 

1511 # read primary line 

1512 mo = self._cg_primary_re.match(line) 

1513 if not mo: 

1514 sys.stderr.write('warning: unrecognized call graph entry (2): %r\n' % line) 

1515 return 

1516 else: 

1517 function = self.translate(mo) 

1518 

1519 while lines: 

1520 line = lines.pop(0) 

1521 

1522 # read function subroutine line 

1523 mo = self._cg_child_re.match(line) 

1524 if not mo: 

1525 sys.stderr.write('warning: unrecognized call graph entry (3): %r\n' % line) 

1526 else: 

1527 child = self.translate(mo) 

1528 if child.name != '<spontaneous>': 

1529 children.append(child) 

1530 

1531 if function.name != '<spontaneous>': 

1532 function.parents = parents 

1533 function.children = children 

1534 

1535 self.functions[function.index] = function 

1536 

1537 def parse_cycle_entry(self, lines): 

1538 

1539 # Process the parents that were not there in gprof format. 

1540 parents = [] 

1541 while True: 

1542 if not lines: 

1543 sys.stderr.write('warning: unexpected end of cycle entry\n') 

1544 return 

1545 line = lines.pop(0) 

1546 if line.startswith('['): 

1547 break 

1548 mo = self._cg_parent_re.match(line) 

1549 if not mo: 

1550 sys.stderr.write('warning: unrecognized call graph entry (6): %r\n' % line) 

1551 else: 

1552 parent = self.translate(mo) 

1553 if parent.name != '<spontaneous>': 

1554 parents.append(parent) 

1555 

1556 # read cycle header line 

1557 mo = self._cg_cycle_header_re.match(line) 

1558 if not mo: 

1559 sys.stderr.write('warning: unrecognized call graph entry (4): %r\n' % line) 

1560 return 

1561 cycle = self.translate(mo) 

1562 

1563 # read cycle member lines 

1564 cycle.functions = [] 

1565 for line in lines[1:]: 

1566 mo = self._cg_cycle_member_re.match(line) 

1567 if not mo: 

1568 sys.stderr.write('warning: unrecognized call graph entry (5): %r\n' % line) 

1569 continue 

1570 call = self.translate(mo) 

1571 cycle.functions.append(call) 

1572 

1573 cycle.parents = parents 

1574 self.cycles[cycle.cycle] = cycle 

1575 

1576 def parse_cg_entry(self, lines): 

1577 if any("as a whole" in linelooper for linelooper in lines): 

1578 self.parse_cycle_entry(lines) 

1579 else: 

1580 self.parse_function_entry(lines) 

1581 

1582 def parse_cg(self): 

1583 """Parse the call graph.""" 

1584 

1585 # skip call graph header 

1586 line = self.readline() 

1587 while self._cg_header_re.match(line): 

1588 line = self.readline() 

1589 

1590 # process call graph entries 

1591 entry_lines = [] 

1592 # An EOF in readline terminates the program without returning. 

1593 while not self._cg_footer_re.match(line): 

1594 if line.isspace(): 

1595 self.parse_cg_entry(entry_lines) 

1596 entry_lines = [] 

1597 else: 

1598 entry_lines.append(line) 

1599 line = self.readline() 

1600 

1601 def parse(self): 

1602 sys.stderr.write('warning: for axe format, edge weights are unreliable estimates derived from function total times.\n') 

1603 self.parse_cg() 

1604 self.fp.close() 

1605 

1606 profile = Profile() 

1607 profile[TIME] = 0.0 

1608 

1609 cycles = {} 

1610 for index in self.cycles: 

1611 cycles[index] = Cycle() 

1612 

1613 for entry in self.functions.values(): 

1614 # populate the function 

1615 function = Function(entry.index, entry.name) 

1616 function[TIME] = entry.self 

1617 function[TOTAL_TIME_RATIO] = entry.percentage_time / 100.0 

1618 

1619 # populate the function calls 

1620 for child in entry.children: 

1621 call = Call(child.index) 

1622 # The following bogus value affects only the weighting of 

1623 # the calls. 

1624 call[TOTAL_TIME_RATIO] = function[TOTAL_TIME_RATIO] 

1625 

1626 if child.index not in self.functions: 

1627 # NOTE: functions that were never called but were discovered by gprof's 

1628 # static call graph analysis dont have a call graph entry so we need 

1629 # to add them here 

1630 # FIXME: Is this applicable? 

1631 missing = Function(child.index, child.name) 

1632 function[TIME] = 0.0 

1633 profile.add_function(missing) 

1634 

1635 function.add_call(call) 

1636 

1637 profile.add_function(function) 

1638 

1639 if entry.cycle is not None: 

1640 try: 

1641 cycle = cycles[entry.cycle] 

1642 except KeyError: 

1643 sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle) 

1644 cycle = Cycle() 

1645 cycles[entry.cycle] = cycle 

1646 cycle.add_function(function) 

1647 

1648 profile[TIME] = profile[TIME] + function[TIME] 

1649 

1650 for cycle in cycles.values(): 

1651 profile.add_cycle(cycle) 

1652 

1653 # Compute derived events. 

1654 profile.validate() 

1655 profile.ratio(TIME_RATIO, TIME) 

1656 # Lacking call counts, fake call ratios based on total times. 

1657 profile.call_ratios(TOTAL_TIME_RATIO) 

1658 # The TOTAL_TIME_RATIO of functions is already set. Propagate that 

1659 # total time to the calls. (TOTAL_TIME is neither set nor used.) 

1660 for function in profile.functions.values(): 

1661 for call in function.calls.values(): 

1662 if call.ratio is not None: 

1663 callee = profile.functions[call.callee_id] 

1664 call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO] 

1665 

1666 return profile 

1667 

1668 

1669class CallgrindParser(LineParser): 

1670 """Parser for valgrind's callgrind tool. 

1671 

1672 See also: 

1673 - https://valgrind.org/docs/manual/cl-format.html 

1674 """ 

1675 

1676 _call_re = re.compile(r'^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$') 

1677 

1678 def __init__(self, infile): 

1679 LineParser.__init__(self, infile) 

1680 

1681 # Textual positions 

1682 self.position_ids = {} 

1683 self.positions = {} 

1684 

1685 # Numeric positions 

1686 self.num_positions = 1 

1687 self.cost_positions = ['line'] 

1688 self.last_positions = [0] 

1689 

1690 # Events 

1691 self.num_events = 0 

1692 self.cost_events = [] 

1693 

1694 self.profile = Profile() 

1695 self.profile[SAMPLES] = 0 

1696 

1697 def parse(self): 

1698 # read lookahead 

1699 self.readline() 

1700 

1701 self.parse_key('version') 

1702 self.parse_key('creator') 

1703 while self.parse_part(): 

1704 pass 

1705 if not self.eof(): 

1706 sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no) 

1707 sys.stderr.write('%s\n' % self.lookahead()) 

1708 

1709 # compute derived data 

1710 self.profile.validate() 

1711 self.profile.find_cycles() 

1712 self.profile.ratio(TIME_RATIO, SAMPLES) 

1713 self.profile.call_ratios(SAMPLES2) 

1714 self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

1715 

1716 return self.profile 

1717 

1718 def parse_part(self): 

1719 if not self.parse_header_line(): 

1720 return False 

1721 while self.parse_header_line(): 

1722 pass 

1723 if not self.parse_body_line(): 

1724 return False 

1725 while self.parse_body_line(): 

1726 pass 

1727 return True 

1728 

1729 def parse_header_line(self): 

1730 return \ 

1731 self.parse_empty() or \ 

1732 self.parse_comment() or \ 

1733 self.parse_part_detail() or \ 

1734 self.parse_description() or \ 

1735 self.parse_event_specification() or \ 

1736 self.parse_cost_line_def() or \ 

1737 self.parse_cost_summary() 

1738 

1739 _detail_keys = set(('cmd', 'pid', 'thread', 'part')) 

1740 

1741 def parse_part_detail(self): 

1742 return self.parse_keys(self._detail_keys) 

1743 

1744 def parse_description(self): 

1745 return self.parse_key('desc') is not None 

1746 

1747 def parse_event_specification(self): 

1748 event = self.parse_key('event') 

1749 if event is None: 

1750 return False 

1751 return True 

1752 

1753 def parse_cost_line_def(self): 

1754 pair = self.parse_keys(('events', 'positions')) 

1755 if pair is None: 

1756 return False 

1757 key, value = pair 

1758 items = value.split() 

1759 if key == 'events': 

1760 self.num_events = len(items) 

1761 self.cost_events = items 

1762 if key == 'positions': 

1763 self.num_positions = len(items) 

1764 self.cost_positions = items 

1765 self.last_positions = [0]*self.num_positions 

1766 return True 

1767 

1768 def parse_cost_summary(self): 

1769 pair = self.parse_keys(('summary', 'totals')) 

1770 if pair is None: 

1771 return False 

1772 return True 

1773 

1774 def parse_body_line(self): 

1775 return \ 

1776 self.parse_empty() or \ 

1777 self.parse_comment() or \ 

1778 self.parse_cost_line() or \ 

1779 self.parse_position_spec() or \ 

1780 self.parse_association_spec() 

1781 

1782 __subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)' 

1783 _cost_re = re.compile(r'^' + 

1784 __subpos_re + r'( +' + __subpos_re + r')*' + 

1785 r'( +\d+)*' + 

1786 '$') 

1787 

1788 def parse_cost_line(self, calls=None): 

1789 line = self.lookahead().rstrip() 

1790 mo = self._cost_re.match(line) 

1791 if not mo: 

1792 return False 

1793 

1794 function = self.get_function() 

1795 

1796 if calls is None: 

1797 # Unlike other aspects, call object (cob) is relative not to the 

1798 # last call object, but to the caller's object (ob), so try to 

1799 # update it when processing a functions cost line 

1800 try: 

1801 self.positions['cob'] = self.positions['ob'] 

1802 except KeyError: 

1803 pass 

1804 

1805 values = line.split() 

1806 assert len(values) <= self.num_positions + self.num_events 

1807 

1808 positions = values[0 : self.num_positions] 

1809 events = values[self.num_positions : ] 

1810 events += ['0']*(self.num_events - len(events)) 

1811 

1812 for i in range(self.num_positions): 

1813 position = positions[i] 

1814 if position == '*': 

1815 position = self.last_positions[i] 

1816 elif position[0] in '-+': 

1817 position = self.last_positions[i] + int(position) 

1818 elif position.startswith('0x'): 

1819 position = int(position, 16) 

1820 else: 

1821 position = int(position) 

1822 self.last_positions[i] = position 

1823 

1824 events = [float(event) for event in events] 

1825 

1826 if calls is None: 

1827 function[SAMPLES] += events[0] 

1828 self.profile[SAMPLES] += events[0] 

1829 else: 

1830 callee = self.get_callee() 

1831 callee.called += calls 

1832 

1833 try: 

1834 call = function.calls[callee.id] 

1835 except KeyError: 

1836 call = Call(callee.id) 

1837 call[CALLS] = calls 

1838 call[SAMPLES2] = events[0] 

1839 function.add_call(call) 

1840 else: 

1841 call[CALLS] += calls 

1842 call[SAMPLES2] += events[0] 

1843 

1844 self.consume() 

1845 return True 

1846 

1847 def parse_association_spec(self): 

1848 line = self.lookahead() 

1849 if not line.startswith('calls='): 

1850 return False 

1851 

1852 _, values = line.split('=', 1) 

1853 values = values.strip().split() 

1854 calls = int(values[0]) 

1855 call_position = values[1:] 

1856 self.consume() 

1857 

1858 self.parse_cost_line(calls) 

1859 

1860 return True 

1861 

1862 _position_re = re.compile(r'^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?') 

1863 

1864 _position_table_map = { 

1865 'ob': 'ob', 

1866 'fl': 'fl', 

1867 'fi': 'fl', 

1868 'fe': 'fl', 

1869 'fn': 'fn', 

1870 'cob': 'ob', 

1871 'cfl': 'fl', 

1872 'cfi': 'fl', 

1873 'cfe': 'fl', 

1874 'cfn': 'fn', 

1875 'jfi': 'fl', 

1876 } 

1877 

1878 _position_map = { 

1879 'ob': 'ob', 

1880 'fl': 'fl', 

1881 'fi': 'fl', 

1882 'fe': 'fl', 

1883 'fn': 'fn', 

1884 'cob': 'cob', 

1885 'cfl': 'cfl', 

1886 'cfi': 'cfl', 

1887 'cfe': 'cfl', 

1888 'cfn': 'cfn', 

1889 'jfi': 'jfi', 

1890 } 

1891 

1892 def parse_position_spec(self): 

1893 line = self.lookahead() 

1894 

1895 if line.startswith('jump=') or line.startswith('jcnd='): 

1896 self.consume() 

1897 return True 

1898 

1899 mo = self._position_re.match(line) 

1900 if not mo: 

1901 return False 

1902 

1903 position, id, name = mo.groups() 

1904 if id: 

1905 table = self._position_table_map[position] 

1906 if name: 

1907 self.position_ids[(table, id)] = name 

1908 else: 

1909 name = self.position_ids.get((table, id), '') 

1910 self.positions[self._position_map[position]] = name 

1911 

1912 self.consume() 

1913 return True 

1914 

1915 def parse_empty(self): 

1916 if self.eof(): 

1917 return False 

1918 line = self.lookahead() 

1919 if line.strip(): 

1920 return False 

1921 self.consume() 

1922 return True 

1923 

1924 def parse_comment(self): 

1925 line = self.lookahead() 

1926 if not line.startswith('#'): 

1927 return False 

1928 self.consume() 

1929 return True 

1930 

1931 _key_re = re.compile(r'^(\w+):') 

1932 

1933 def parse_key(self, key): 

1934 pair = self.parse_keys((key,)) 

1935 if not pair: 

1936 return None 

1937 key, value = pair 

1938 return value 

1939 

1940 def parse_keys(self, keys): 

1941 line = self.lookahead() 

1942 mo = self._key_re.match(line) 

1943 if not mo: 

1944 return None 

1945 key, value = line.split(':', 1) 

1946 if key not in keys: 

1947 return None 

1948 value = value.strip() 

1949 self.consume() 

1950 return key, value 

1951 

1952 def make_function(self, module, filename, name): 

1953 # FIXME: module and filename are not being tracked reliably 

1954 #id = '|'.join((module, filename, name)) 

1955 id = name 

1956 try: 

1957 function = self.profile.functions[id] 

1958 except KeyError: 

1959 function = Function(id, name) 

1960 if module: 

1961 function.module = os.path.basename(module) 

1962 function[SAMPLES] = 0 

1963 function.called = 0 

1964 self.profile.add_function(function) 

1965 return function 

1966 

1967 def get_function(self): 

1968 module = self.positions.get('ob', '') 

1969 filename = self.positions.get('fl', '') 

1970 function = self.positions.get('fn', '') 

1971 return self.make_function(module, filename, function) 

1972 

1973 def get_callee(self): 

1974 module = self.positions.get('cob', '') 

1975 filename = self.positions.get('cfi', '') 

1976 function = self.positions.get('cfn', '') 

1977 return self.make_function(module, filename, function) 

1978 

1979 def readline(self): 

1980 # Override LineParser.readline to ignore comment lines 

1981 while True: 

1982 LineParser.readline(self) 

1983 if self.eof() or not self.lookahead().startswith('#'): 

1984 break 

1985 

1986 

1987class PerfParser(LineParser): 

1988 """Parser for linux perf callgraph output. 

1989 

1990 It expects output generated with 

1991 

1992 perf record -g 

1993 perf script | gprof2dot.py --format=perf 

1994 """ 

1995 

1996 def __init__(self, infile): 

1997 LineParser.__init__(self, infile) 

1998 self.profile = Profile() 

1999 

2000 def readline(self): 

2001 # Override LineParser.readline to ignore comment lines 

2002 while True: 

2003 LineParser.readline(self) 

2004 if self.eof() or not self.lookahead().startswith('#'): 

2005 break 

2006 

2007 def parse(self): 

2008 # read lookahead 

2009 self.readline() 

2010 

2011 profile = self.profile 

2012 profile[SAMPLES] = 0 

2013 while not self.eof(): 

2014 self.parse_event() 

2015 

2016 # compute derived data 

2017 profile.validate() 

2018 profile.find_cycles() 

2019 profile.ratio(TIME_RATIO, SAMPLES) 

2020 profile.call_ratios(SAMPLES2) 

2021 if totalMethod == "callratios": 

2022 # Heuristic approach. TOTAL_SAMPLES is unused. 

2023 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2024 elif totalMethod == "callstacks": 

2025 # Use the actual call chains for functions. 

2026 profile[TOTAL_SAMPLES] = profile[SAMPLES] 

2027 profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES) 

2028 # Then propagate that total time to the calls. 

2029 for function in profile.functions.values(): 

2030 for call in function.calls.values(): 

2031 if call.ratio is not None: 

2032 callee = profile.functions[call.callee_id] 

2033 call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO] 

2034 else: 

2035 assert False 

2036 

2037 return profile 

2038 

2039 def parse_event(self): 

2040 if self.eof(): 

2041 return 

2042 

2043 line = self.consume() 

2044 assert line 

2045 

2046 callchain = self.parse_callchain() 

2047 if not callchain: 

2048 return 

2049 

2050 callee = callchain[0] 

2051 callee[SAMPLES] += 1 

2052 self.profile[SAMPLES] += 1 

2053 

2054 for caller in callchain[1:]: 

2055 try: 

2056 call = caller.calls[callee.id] 

2057 except KeyError: 

2058 call = Call(callee.id) 

2059 call[SAMPLES2] = 1 

2060 caller.add_call(call) 

2061 else: 

2062 call[SAMPLES2] += 1 

2063 

2064 callee = caller 

2065 

2066 # Increment TOTAL_SAMPLES only once on each function. 

2067 stack = set(callchain) 

2068 for function in stack: 

2069 function[TOTAL_SAMPLES] += 1 

2070 

2071 def parse_callchain(self): 

2072 callchain = [] 

2073 while self.lookahead(): 

2074 function = self.parse_call() 

2075 if function is None: 

2076 break 

2077 callchain.append(function) 

2078 if self.lookahead() == '': 

2079 self.consume() 

2080 return callchain 

2081 

2082 call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>.*)\)$') 

2083 addr2_re = re.compile(r'\+0x[0-9a-fA-F]+$') 

2084 

2085 def parse_call(self): 

2086 line = self.consume() 

2087 mo = self.call_re.match(line) 

2088 assert mo 

2089 if not mo: 

2090 return None 

2091 

2092 function_name = mo.group('symbol') 

2093 

2094 # If present, amputate program counter from function name. 

2095 if function_name: 

2096 function_name = re.sub(self.addr2_re, '', function_name) 

2097 

2098 if not function_name or function_name == '[unknown]': 

2099 function_name = mo.group('address') 

2100 

2101 module = mo.group('module') 

2102 

2103 function_id = function_name + ':' + module 

2104 

2105 try: 

2106 function = self.profile.functions[function_id] 

2107 except KeyError: 

2108 function = Function(function_id, function_name) 

2109 function.module = os.path.basename(module) 

2110 function[SAMPLES] = 0 

2111 function[TOTAL_SAMPLES] = 0 

2112 self.profile.add_function(function) 

2113 

2114 return function 

2115 

2116 

2117class OprofileParser(LineParser): 

2118 """Parser for oprofile callgraph output. 

2119 

2120 See also: 

2121 - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph 

2122 """ 

2123 

2124 _fields_re = { 

2125 'samples': r'(\d+)', 

2126 '%': r'(\S+)', 

2127 'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)', 

2128 'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)', 

2129 'app name': r'(?P<application>\S+)', 

2130 'symbol name': r'(?P<symbol>\(no symbols\)|.+?)', 

2131 } 

2132 

2133 def __init__(self, infile): 

2134 LineParser.__init__(self, infile) 

2135 self.entries = {} 

2136 self.entry_re = None 

2137 

2138 def add_entry(self, callers, function, callees): 

2139 try: 

2140 entry = self.entries[function.id] 

2141 except KeyError: 

2142 self.entries[function.id] = (callers, function, callees) 

2143 else: 

2144 callers_total, function_total, callees_total = entry 

2145 self.update_subentries_dict(callers_total, callers) 

2146 function_total.samples += function.samples 

2147 self.update_subentries_dict(callees_total, callees) 

2148 

2149 def update_subentries_dict(self, totals, partials): 

2150 for partial in partials.values(): 

2151 try: 

2152 total = totals[partial.id] 

2153 except KeyError: 

2154 totals[partial.id] = partial 

2155 else: 

2156 total.samples += partial.samples 

2157 

2158 def parse(self): 

2159 # read lookahead 

2160 self.readline() 

2161 

2162 self.parse_header() 

2163 while self.lookahead(): 

2164 self.parse_entry() 

2165 

2166 profile = Profile() 

2167 

2168 reverse_call_samples = {} 

2169 

2170 # populate the profile 

2171 profile[SAMPLES] = 0 

2172 for _callers, _function, _callees in self.entries.values(): 

2173 function = Function(_function.id, _function.name) 

2174 function[SAMPLES] = _function.samples 

2175 profile.add_function(function) 

2176 profile[SAMPLES] += _function.samples 

2177 

2178 if _function.application: 

2179 function.process = os.path.basename(_function.application) 

2180 if _function.image: 

2181 function.module = os.path.basename(_function.image) 

2182 

2183 total_callee_samples = 0 

2184 for _callee in _callees.values(): 

2185 total_callee_samples += _callee.samples 

2186 

2187 for _callee in _callees.values(): 

2188 if not _callee.self: 

2189 call = Call(_callee.id) 

2190 call[SAMPLES2] = _callee.samples 

2191 function.add_call(call) 

2192 

2193 # compute derived data 

2194 profile.validate() 

2195 profile.find_cycles() 

2196 profile.ratio(TIME_RATIO, SAMPLES) 

2197 profile.call_ratios(SAMPLES2) 

2198 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2199 

2200 return profile 

2201 

2202 def parse_header(self): 

2203 while not self.match_header(): 

2204 self.consume() 

2205 line = self.lookahead() 

2206 fields = re.split(r'\s\s+', line) 

2207 entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$' 

2208 self.entry_re = re.compile(entry_re) 

2209 self.skip_separator() 

2210 

2211 def parse_entry(self): 

2212 callers = self.parse_subentries() 

2213 if self.match_primary(): 

2214 function = self.parse_subentry() 

2215 if function is not None: 

2216 callees = self.parse_subentries() 

2217 self.add_entry(callers, function, callees) 

2218 self.skip_separator() 

2219 

2220 def parse_subentries(self): 

2221 subentries = {} 

2222 while self.match_secondary(): 

2223 subentry = self.parse_subentry() 

2224 subentries[subentry.id] = subentry 

2225 return subentries 

2226 

2227 def parse_subentry(self): 

2228 entry = Struct() 

2229 line = self.consume() 

2230 mo = self.entry_re.match(line) 

2231 if not mo: 

2232 raise ParseError('failed to parse', line) 

2233 fields = mo.groupdict() 

2234 entry.samples = int(mo.group(1)) 

2235 if 'source' in fields and fields['source'] != '(no location information)': 

2236 source = fields['source'] 

2237 filename, lineno = source.split(':') 

2238 entry.filename = filename 

2239 entry.lineno = int(lineno) 

2240 else: 

2241 source = '' 

2242 entry.filename = None 

2243 entry.lineno = None 

2244 entry.image = fields.get('image', '') 

2245 entry.application = fields.get('application', '') 

2246 if 'symbol' in fields and fields['symbol'] != '(no symbols)': 

2247 entry.symbol = fields['symbol'] 

2248 else: 

2249 entry.symbol = '' 

2250 if entry.symbol.startswith('"') and entry.symbol.endswith('"'): 

2251 entry.symbol = entry.symbol[1:-1] 

2252 entry.id = ':'.join((entry.application, entry.image, source, entry.symbol)) 

2253 entry.self = fields.get('self', None) != None 

2254 if entry.self: 

2255 entry.id += ':self' 

2256 if entry.symbol: 

2257 entry.name = entry.symbol 

2258 else: 

2259 entry.name = entry.image 

2260 return entry 

2261 

2262 def skip_separator(self): 

2263 while not self.match_separator(): 

2264 self.consume() 

2265 self.consume() 

2266 

2267 def match_header(self): 

2268 line = self.lookahead() 

2269 return line.startswith('samples') 

2270 

2271 def match_separator(self): 

2272 line = self.lookahead() 

2273 return line == '-'*len(line) 

2274 

2275 def match_primary(self): 

2276 line = self.lookahead() 

2277 return not line[:1].isspace() 

2278 

2279 def match_secondary(self): 

2280 line = self.lookahead() 

2281 return line[:1].isspace() 

2282 

2283 

2284class HProfParser(LineParser): 

2285 """Parser for java hprof output 

2286 

2287 See also: 

2288 - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html 

2289 """ 

2290 

2291 trace_re = re.compile(r'\t(.*)\((.*):(.*)\)') 

2292 trace_id_re = re.compile(r'^TRACE (\d+):$') 

2293 

2294 def __init__(self, infile): 

2295 LineParser.__init__(self, infile) 

2296 self.traces = {} 

2297 self.samples = {} 

2298 

2299 def parse(self): 

2300 # read lookahead 

2301 self.readline() 

2302 

2303 while not self.lookahead().startswith('------'): self.consume() 

2304 while not self.lookahead().startswith('TRACE '): self.consume() 

2305 

2306 self.parse_traces() 

2307 

2308 while not self.lookahead().startswith('CPU'): 

2309 self.consume() 

2310 

2311 self.parse_samples() 

2312 

2313 # populate the profile 

2314 profile = Profile() 

2315 profile[SAMPLES] = 0 

2316 

2317 functions = {} 

2318 

2319 # build up callgraph 

2320 for id, trace in self.traces.items(): 

2321 if not id in self.samples: continue 

2322 mtime = self.samples[id][0] 

2323 last = None 

2324 

2325 for func, file, line in trace: 

2326 if not func in functions: 

2327 function = Function(func, func) 

2328 function[SAMPLES] = 0 

2329 profile.add_function(function) 

2330 functions[func] = function 

2331 

2332 function = functions[func] 

2333 # allocate time to the deepest method in the trace 

2334 if not last: 

2335 function[SAMPLES] += mtime 

2336 profile[SAMPLES] += mtime 

2337 else: 

2338 c = function.get_call(last) 

2339 c[SAMPLES2] += mtime 

2340 

2341 last = func 

2342 

2343 # compute derived data 

2344 profile.validate() 

2345 profile.find_cycles() 

2346 profile.ratio(TIME_RATIO, SAMPLES) 

2347 profile.call_ratios(SAMPLES2) 

2348 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2349 

2350 return profile 

2351 

2352 def parse_traces(self): 

2353 while self.lookahead().startswith('TRACE '): 

2354 self.parse_trace() 

2355 

2356 def parse_trace(self): 

2357 l = self.consume() 

2358 mo = self.trace_id_re.match(l) 

2359 tid = mo.group(1) 

2360 last = None 

2361 trace = [] 

2362 

2363 while self.lookahead().startswith('\t'): 

2364 l = self.consume() 

2365 match = self.trace_re.search(l) 

2366 if not match: 

2367 #sys.stderr.write('Invalid line: %s\n' % l) 

2368 break 

2369 else: 

2370 function_name, file, line = match.groups() 

2371 trace += [(function_name, file, line)] 

2372 

2373 self.traces[int(tid)] = trace 

2374 

2375 def parse_samples(self): 

2376 self.consume() 

2377 self.consume() 

2378 

2379 while not self.lookahead().startswith('CPU'): 

2380 rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split() 

2381 self.samples[int(traceid)] = (int(count), method) 

2382 self.consume() 

2383 

2384 

2385class SysprofParser(XmlParser): 

2386 

2387 def __init__(self, stream): 

2388 XmlParser.__init__(self, stream) 

2389 

2390 def parse(self): 

2391 objects = {} 

2392 nodes = {} 

2393 

2394 self.element_start('profile') 

2395 while self.token.type == XML_ELEMENT_START: 

2396 if self.token.name_or_data == 'objects': 

2397 assert not objects 

2398 objects = self.parse_items('objects') 

2399 elif self.token.name_or_data == 'nodes': 

2400 assert not nodes 

2401 nodes = self.parse_items('nodes') 

2402 else: 

2403 self.parse_value(self.token.name_or_data) 

2404 self.element_end('profile') 

2405 

2406 return self.build_profile(objects, nodes) 

2407 

2408 def parse_items(self, name): 

2409 assert name[-1] == 's' 

2410 items = {} 

2411 self.element_start(name) 

2412 while self.token.type == XML_ELEMENT_START: 

2413 id, values = self.parse_item(name[:-1]) 

2414 assert id not in items 

2415 items[id] = values 

2416 self.element_end(name) 

2417 return items 

2418 

2419 def parse_item(self, name): 

2420 attrs = self.element_start(name) 

2421 id = int(attrs['id']) 

2422 values = self.parse_values() 

2423 self.element_end(name) 

2424 return id, values 

2425 

2426 def parse_values(self): 

2427 values = {} 

2428 while self.token.type == XML_ELEMENT_START: 

2429 name = self.token.name_or_data 

2430 value = self.parse_value(name) 

2431 assert name not in values 

2432 values[name] = value 

2433 return values 

2434 

2435 def parse_value(self, tag): 

2436 self.element_start(tag) 

2437 value = self.character_data() 

2438 self.element_end(tag) 

2439 if value.isdigit(): 

2440 return int(value) 

2441 if value.startswith('"') and value.endswith('"'): 

2442 return value[1:-1] 

2443 return value 

2444 

2445 def build_profile(self, objects, nodes): 

2446 profile = Profile() 

2447 

2448 profile[SAMPLES] = 0 

2449 for id, object in objects.items(): 

2450 # Ignore fake objects (process names, modules, "Everything", "kernel", etc.) 

2451 if object['self'] == 0: 

2452 continue 

2453 

2454 function = Function(id, object['name']) 

2455 function[SAMPLES] = object['self'] 

2456 profile.add_function(function) 

2457 profile[SAMPLES] += function[SAMPLES] 

2458 

2459 for id, node in nodes.items(): 

2460 # Ignore fake calls 

2461 if node['self'] == 0: 

2462 continue 

2463 

2464 # Find a non-ignored parent 

2465 parent_id = node['parent'] 

2466 while parent_id != 0: 

2467 parent = nodes[parent_id] 

2468 caller_id = parent['object'] 

2469 if objects[caller_id]['self'] != 0: 

2470 break 

2471 parent_id = parent['parent'] 

2472 if parent_id == 0: 

2473 continue 

2474 

2475 callee_id = node['object'] 

2476 

2477 assert objects[caller_id]['self'] 

2478 assert objects[callee_id]['self'] 

2479 

2480 function = profile.functions[caller_id] 

2481 

2482 samples = node['self'] 

2483 try: 

2484 call = function.calls[callee_id] 

2485 except KeyError: 

2486 call = Call(callee_id) 

2487 call[SAMPLES2] = samples 

2488 function.add_call(call) 

2489 else: 

2490 call[SAMPLES2] += samples 

2491 

2492 # Compute derived events 

2493 profile.validate() 

2494 profile.find_cycles() 

2495 profile.ratio(TIME_RATIO, SAMPLES) 

2496 profile.call_ratios(SAMPLES2) 

2497 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2498 

2499 return profile 

2500 

2501 

2502class XPerfParser(Parser): 

2503 """Parser for CSVs generated by XPerf, from Microsoft Windows Performance Tools. 

2504 """ 

2505 

2506 def __init__(self, stream): 

2507 Parser.__init__(self) 

2508 self.stream = stream 

2509 self.profile = Profile() 

2510 self.profile[SAMPLES] = 0 

2511 self.column = {} 

2512 

2513 def parse(self): 

2514 import csv 

2515 reader = csv.reader( 

2516 self.stream, 

2517 delimiter = ',', 

2518 quotechar = None, 

2519 escapechar = None, 

2520 doublequote = False, 

2521 skipinitialspace = True, 

2522 lineterminator = '\r\n', 

2523 quoting = csv.QUOTE_NONE) 

2524 header = True 

2525 for row in reader: 

2526 if header: 

2527 self.parse_header(row) 

2528 header = False 

2529 else: 

2530 self.parse_row(row) 

2531 

2532 # compute derived data 

2533 self.profile.validate() 

2534 self.profile.find_cycles() 

2535 self.profile.ratio(TIME_RATIO, SAMPLES) 

2536 self.profile.call_ratios(SAMPLES2) 

2537 self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2538 

2539 return self.profile 

2540 

2541 def parse_header(self, row): 

2542 for column in range(len(row)): 

2543 name = row[column] 

2544 assert name not in self.column 

2545 self.column[name] = column 

2546 

2547 def parse_row(self, row): 

2548 fields = {} 

2549 for name, column in self.column.items(): 

2550 value = row[column] 

2551 for factory in int, float: 

2552 try: 

2553 value = factory(value) 

2554 except ValueError: 

2555 pass 

2556 else: 

2557 break 

2558 fields[name] = value 

2559 

2560 process = fields['Process Name'] 

2561 symbol = fields['Module'] + '!' + fields['Function'] 

2562 weight = fields['Weight'] 

2563 count = fields['Count'] 

2564 

2565 if process == 'Idle': 

2566 return 

2567 

2568 function = self.get_function(process, symbol) 

2569 function[SAMPLES] += weight * count 

2570 self.profile[SAMPLES] += weight * count 

2571 

2572 stack = fields['Stack'] 

2573 if stack != '?': 

2574 stack = stack.split('/') 

2575 assert stack[0] == '[Root]' 

2576 if stack[-1] != symbol: 

2577 # XXX: some cases the sampled function does not appear in the stack 

2578 stack.append(symbol) 

2579 caller = None 

2580 for symbol in stack[1:]: 

2581 callee = self.get_function(process, symbol) 

2582 if caller is not None: 

2583 try: 

2584 call = caller.calls[callee.id] 

2585 except KeyError: 

2586 call = Call(callee.id) 

2587 call[SAMPLES2] = count 

2588 caller.add_call(call) 

2589 else: 

2590 call[SAMPLES2] += count 

2591 caller = callee 

2592 

2593 def get_function(self, process, symbol): 

2594 function_id = process + '!' + symbol 

2595 

2596 try: 

2597 function = self.profile.functions[function_id] 

2598 except KeyError: 

2599 module, name = symbol.split('!', 1) 

2600 function = Function(function_id, name) 

2601 function.process = process 

2602 function.module = module 

2603 function[SAMPLES] = 0 

2604 self.profile.add_function(function) 

2605 

2606 return function 

2607 

2608 

2609class SleepyParser(Parser): 

2610 """Parser for GNU gprof output. 

2611 

2612 See also: 

2613 - http://www.codersnotes.com/sleepy/ 

2614 - http://sleepygraph.sourceforge.net/ 

2615 """ 

2616 

2617 stdinInput = False 

2618 

2619 def __init__(self, filename): 

2620 Parser.__init__(self) 

2621 

2622 from zipfile import ZipFile 

2623 

2624 self.database = ZipFile(filename) 

2625 

2626 self.symbols = {} 

2627 self.calls = {} 

2628 

2629 self.profile = Profile() 

2630 

2631 _symbol_re = re.compile( 

2632 r'^(?P<id>\w+)' + 

2633 r'\s+"(?P<module>[^"]*)"' + 

2634 r'\s+"(?P<procname>[^"]*)"' + 

2635 r'\s+"(?P<sourcefile>[^"]*)"' + 

2636 r'\s+(?P<sourceline>\d+)$' 

2637 ) 

2638 

2639 def openEntry(self, name): 

2640 # Some versions of verysleepy use lowercase filenames 

2641 for database_name in self.database.namelist(): 

2642 if name.lower() == database_name.lower(): 

2643 name = database_name 

2644 break 

2645 

2646 return self.database.open(name, 'r') 

2647 

2648 def parse_symbols(self): 

2649 for line in self.openEntry('Symbols.txt'): 

2650 line = line.decode('UTF-8').rstrip('\r\n') 

2651 

2652 mo = self._symbol_re.match(line) 

2653 if mo: 

2654 symbol_id, module, procname, sourcefile, sourceline = mo.groups() 

2655 

2656 function_id = ':'.join([module, procname]) 

2657 

2658 try: 

2659 function = self.profile.functions[function_id] 

2660 except KeyError: 

2661 function = Function(function_id, procname) 

2662 function.module = module 

2663 function[SAMPLES] = 0 

2664 self.profile.add_function(function) 

2665 

2666 self.symbols[symbol_id] = function 

2667 

2668 def parse_callstacks(self): 

2669 for line in self.openEntry('Callstacks.txt'): 

2670 line = line.decode('UTF-8').rstrip('\r\n') 

2671 

2672 fields = line.split() 

2673 samples = float(fields[0]) 

2674 callstack = fields[1:] 

2675 

2676 callstack = [self.symbols[symbol_id] for symbol_id in callstack] 

2677 

2678 callee = callstack[0] 

2679 

2680 callee[SAMPLES] += samples 

2681 self.profile[SAMPLES] += samples 

2682 

2683 for caller in callstack[1:]: 

2684 try: 

2685 call = caller.calls[callee.id] 

2686 except KeyError: 

2687 call = Call(callee.id) 

2688 call[SAMPLES2] = samples 

2689 caller.add_call(call) 

2690 else: 

2691 call[SAMPLES2] += samples 

2692 

2693 callee = caller 

2694 

2695 def parse(self): 

2696 profile = self.profile 

2697 profile[SAMPLES] = 0 

2698 

2699 self.parse_symbols() 

2700 self.parse_callstacks() 

2701 

2702 # Compute derived events 

2703 profile.validate() 

2704 profile.find_cycles() 

2705 profile.ratio(TIME_RATIO, SAMPLES) 

2706 profile.call_ratios(SAMPLES2) 

2707 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2708 

2709 return profile 

2710 

2711 

2712class PstatsParser: 

2713 """Parser python profiling statistics saved with te pstats module.""" 

2714 

2715 stdinInput = False 

2716 multipleInput = True 

2717 

2718 def __init__(self, *filename): 

2719 import pstats 

2720 try: 

2721 self.stats = pstats.Stats(*filename) 

2722 except ValueError: 

2723 sys.stderr.write('error: failed to load %s, maybe they are generated by different python version?\n' % ', '.join(filename)) 

2724 sys.exit(1) 

2725 self.profile = Profile() 

2726 self.function_ids = {} 

2727 

2728 def get_function_name(self, key): 

2729 filename, line, name = key 

2730 module = os.path.splitext(filename)[0] 

2731 module = os.path.basename(module) 

2732 return "%s:%d:%s" % (module, line, name) 

2733 

2734 def get_function(self, key): 

2735 try: 

2736 id = self.function_ids[key] 

2737 except KeyError: 

2738 id = len(self.function_ids) 

2739 name = self.get_function_name(key) 

2740 function = Function(id, name) 

2741 function.filename = key[0] 

2742 self.profile.functions[id] = function 

2743 self.function_ids[key] = id 

2744 else: 

2745 function = self.profile.functions[id] 

2746 return function 

2747 

2748 def parse(self): 

2749 self.profile[TIME] = 0.0 

2750 self.profile[TOTAL_TIME] = self.stats.total_tt 

2751 for fn, (cc, nc, tt, ct, callers) in self.stats.stats.items(): 

2752 callee = self.get_function(fn) 

2753 callee.called = nc 

2754 callee[TOTAL_TIME] = ct 

2755 callee[TIME] = tt 

2756 self.profile[TIME] += tt 

2757 self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct) 

2758 for fn, value in callers.items(): 

2759 caller = self.get_function(fn) 

2760 call = Call(callee.id) 

2761 if isinstance(value, tuple): 

2762 for i in range(0, len(value), 4): 

2763 nc, cc, tt, ct = value[i:i+4] 

2764 if CALLS in call: 

2765 call[CALLS] += cc 

2766 else: 

2767 call[CALLS] = cc 

2768 

2769 if TOTAL_TIME in call: 

2770 call[TOTAL_TIME] += ct 

2771 else: 

2772 call[TOTAL_TIME] = ct 

2773 

2774 else: 

2775 call[CALLS] = value 

2776 call[TOTAL_TIME] = ratio(value, nc)*ct 

2777 

2778 caller.add_call(call) 

2779 

2780 if False: 

2781 self.stats.print_stats() 

2782 self.stats.print_callees() 

2783 

2784 # Compute derived events 

2785 self.profile.validate() 

2786 self.profile.ratio(TIME_RATIO, TIME) 

2787 self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME) 

2788 

2789 return self.profile 

2790 

2791class DtraceParser(LineParser): 

2792 """Parser for linux perf callgraph output. 

2793 

2794 It expects output generated with 

2795 

2796 # Refer to https://github.com/brendangregg/FlameGraph#dtrace 

2797 # 60 seconds of user-level stacks, including time spent in-kernel, for PID 12345 at 97 Hertz 

2798 sudo dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks 

2799 

2800 # The dtrace output 

2801 gprof2dot.py -f dtrace out.user_stacks 

2802 

2803 # Notice: sometimes, the dtrace outputs format may be latin-1, and gprof2dot will fail to parse it. 

2804 # To solve this problem, you should use iconv to convert to UTF-8 explicitly. 

2805 # TODO: add an encoding flag to tell gprof2dot how to decode the profile file. 

2806 iconv -f ISO-8859-1 -t UTF-8 out.user_stacks | gprof2dot.py -f dtrace 

2807 """ 

2808 

2809 def __init__(self, infile): 

2810 LineParser.__init__(self, infile) 

2811 self.profile = Profile() 

2812 

2813 def readline(self): 

2814 # Override LineParser.readline to ignore comment lines 

2815 while True: 

2816 LineParser.readline(self) 

2817 if self.eof(): 

2818 break 

2819 

2820 line = self.lookahead().strip() 

2821 if line.startswith('CPU'): 

2822 # The format likes: 

2823 # CPU ID FUNCTION:NAME 

2824 # 1 29684 :tick-60s 

2825 # Skip next line 

2826 LineParser.readline(self) 

2827 elif not line == '': 

2828 break 

2829 

2830 

2831 def parse(self): 

2832 # read lookahead 

2833 self.readline() 

2834 

2835 profile = self.profile 

2836 profile[SAMPLES] = 0 

2837 while not self.eof(): 

2838 self.parse_event() 

2839 

2840 # compute derived data 

2841 profile.validate() 

2842 profile.find_cycles() 

2843 profile.ratio(TIME_RATIO, SAMPLES) 

2844 profile.call_ratios(SAMPLES2) 

2845 if totalMethod == "callratios": 

2846 # Heuristic approach. TOTAL_SAMPLES is unused. 

2847 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2848 elif totalMethod == "callstacks": 

2849 # Use the actual call chains for functions. 

2850 profile[TOTAL_SAMPLES] = profile[SAMPLES] 

2851 profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES) 

2852 # Then propagate that total time to the calls. 

2853 for function in profile.functions.values(): 

2854 for call in function.calls.values(): 

2855 if call.ratio is not None: 

2856 callee = profile.functions[call.callee_id] 

2857 call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO] 

2858 else: 

2859 assert False 

2860 

2861 return profile 

2862 

2863 def parse_event(self): 

2864 if self.eof(): 

2865 return 

2866 

2867 callchain, count = self.parse_callchain() 

2868 if not callchain: 

2869 return 

2870 

2871 callee = callchain[0] 

2872 callee[SAMPLES] += count 

2873 self.profile[SAMPLES] += count 

2874 

2875 for caller in callchain[1:]: 

2876 try: 

2877 call = caller.calls[callee.id] 

2878 except KeyError: 

2879 call = Call(callee.id) 

2880 call[SAMPLES2] = count 

2881 caller.add_call(call) 

2882 else: 

2883 call[SAMPLES2] += count 

2884 

2885 callee = caller 

2886 

2887 # Increment TOTAL_SAMPLES only once on each function. 

2888 stack = set(callchain) 

2889 for function in stack: 

2890 function[TOTAL_SAMPLES] += count 

2891 

2892 

2893 def parse_callchain(self): 

2894 callchain = [] 

2895 count = 0 

2896 while self.lookahead(): 

2897 function, count = self.parse_call() 

2898 if function is None: 

2899 break 

2900 callchain.append(function) 

2901 return callchain, count 

2902 

2903 call_re = re.compile(r'^\s+(?P<module>.*)`(?P<symbol>.*)') 

2904 addr2_re = re.compile(r'\+0x[0-9a-fA-F]+$') 

2905 

2906 def parse_call(self): 

2907 line = self.consume() 

2908 mo = self.call_re.match(line) 

2909 if not mo: 

2910 # The line must be the stack count 

2911 return None, int(line.strip()) 

2912 

2913 function_name = mo.group('symbol') 

2914 

2915 # If present, amputate program counter from function name. 

2916 if function_name: 

2917 function_name = re.sub(self.addr2_re, '', function_name) 

2918 

2919 # if not function_name or function_name == '[unknown]': 

2920 # function_name = mo.group('address') 

2921 

2922 module = mo.group('module') 

2923 

2924 function_id = function_name + ':' + module 

2925 

2926 try: 

2927 function = self.profile.functions[function_id] 

2928 except KeyError: 

2929 function = Function(function_id, function_name) 

2930 function.module = os.path.basename(module) 

2931 function[SAMPLES] = 0 

2932 function[TOTAL_SAMPLES] = 0 

2933 self.profile.add_function(function) 

2934 

2935 return function, None 

2936 

2937 

2938class CollapseParser(LineParser): 

2939 """Parser for the output of stackcollapse 

2940 

2941 (from https://github.com/brendangregg/FlameGraph) 

2942 """ 

2943 

2944 def __init__(self, infile): 

2945 LineParser.__init__(self, infile) 

2946 self.profile = Profile() 

2947 

2948 def parse(self): 

2949 profile = self.profile 

2950 profile[SAMPLES] = 0 

2951 

2952 self.readline() 

2953 while not self.eof(): 

2954 self.parse_event() 

2955 

2956 # compute derived data 

2957 profile.validate() 

2958 profile.find_cycles() 

2959 profile.ratio(TIME_RATIO, SAMPLES) 

2960 profile.call_ratios(SAMPLES2) 

2961 if totalMethod == "callratios": 

2962 # Heuristic approach. TOTAL_SAMPLES is unused. 

2963 profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO) 

2964 elif totalMethod == "callstacks": 

2965 # Use the actual call chains for functions. 

2966 profile[TOTAL_SAMPLES] = profile[SAMPLES] 

2967 profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES) 

2968 # Then propagate that total time to the calls. 

2969 for function in compat_itervalues(profile.functions): 

2970 for call in compat_itervalues(function.calls): 

2971 if call.ratio is not None: 

2972 callee = profile.functions[call.callee_id] 

2973 call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO] 

2974 else: 

2975 assert False 

2976 

2977 return profile 

2978 

2979 def parse_event(self): 

2980 line = self.consume() 

2981 

2982 stack, count = line.rsplit(' ',maxsplit=1) 

2983 count=int(count) 

2984 self.profile[SAMPLES] += count 

2985 

2986 calls = stack.split(';') 

2987 functions = [self._make_function(call) for call in calls] 

2988 

2989 functions[-1][SAMPLES] += count 

2990 

2991 # TOTAL_SAMPLES excludes loops 

2992 for func in set(functions): 

2993 func[TOTAL_SAMPLES] += count 

2994 

2995 # add call data 

2996 callee = functions[-1] 

2997 for caller in reversed(functions[:-1]): 

2998 call = caller.calls.get(callee.id) 

2999 if call is None: 

3000 call = Call(callee.id) 

3001 call[SAMPLES2] = 0 

3002 caller.add_call(call) 

3003 call[SAMPLES2] += count 

3004 callee = caller 

3005 

3006 call_re = re.compile(r'^(?P<func>[^ ]+) \((?P<file>.*):(?P<line>[0-9]+)\)$') 

3007 

3008 def _make_function(self, call): 

3009 """turn a call str into a Function 

3010 

3011 takes a call site, as found between semicolons in the input, and returns 

3012 a Function definition corresponding to that call site. 

3013 """ 

3014 mo = self.call_re.match(call) 

3015 if mo: 

3016 name, module, line = mo.groups() 

3017 func_id = "%s:%s" % (module, name) 

3018 else: 

3019 name = func_id = call 

3020 module = None 

3021 

3022 func = self.profile.functions.get(func_id) 

3023 if func is None: 

3024 func = Function(func_id, name) 

3025 func.module = module 

3026 func[SAMPLES] = 0 

3027 func[TOTAL_SAMPLES] = 0 

3028 self.profile.add_function(func) 

3029 return func 

3030 

3031 

3032formats = { 

3033 "axe": AXEParser, 

3034 "callgrind": CallgrindParser, 

3035 "collapse": CollapseParser, 

3036 "hprof": HProfParser, 

3037 "json": JsonParser, 

3038 "oprofile": OprofileParser, 

3039 "perf": PerfParser, 

3040 "prof": GprofParser, 

3041 "pstats": PstatsParser, 

3042 "sleepy": SleepyParser, 

3043 "sysprof": SysprofParser, 

3044 "xperf": XPerfParser, 

3045 "dtrace": DtraceParser, 

3046} 

3047 

3048 

3049######################################################################## 

3050# Output 

3051 

3052 

3053class Theme: 

3054 

3055 def __init__(self, 

3056 bgcolor = (0.0, 0.0, 1.0), 

3057 mincolor = (0.0, 0.0, 0.0), 

3058 maxcolor = (0.0, 0.0, 1.0), 

3059 fontname = "Arial", 

3060 fontcolor = "white", 

3061 nodestyle = "filled", 

3062 minfontsize = 10.0, 

3063 maxfontsize = 10.0, 

3064 minpenwidth = 0.5, 

3065 maxpenwidth = 4.0, 

3066 gamma = 2.2, 

3067 skew = 1.0): 

3068 self.bgcolor = bgcolor 

3069 self.mincolor = mincolor 

3070 self.maxcolor = maxcolor 

3071 self.fontname = fontname 

3072 self.fontcolor = fontcolor 

3073 self.nodestyle = nodestyle 

3074 self.minfontsize = minfontsize 

3075 self.maxfontsize = maxfontsize 

3076 self.minpenwidth = minpenwidth 

3077 self.maxpenwidth = maxpenwidth 

3078 self.gamma = gamma 

3079 self.skew = skew 

3080 

3081 def graph_bgcolor(self): 

3082 return self.hsl_to_rgb(*self.bgcolor) 

3083 

3084 def graph_fontname(self): 

3085 return self.fontname 

3086 

3087 def graph_fontcolor(self): 

3088 return self.fontcolor 

3089 

3090 def node_bgcolor(self, weight): 

3091 return self.color(weight) 

3092 

3093 def node_fgcolor(self, weight): 

3094 if self.nodestyle == "filled": 

3095 return self.graph_bgcolor() 

3096 else: 

3097 return self.color(weight) 

3098 

3099 def node_fontsize(self, weight): 

3100 return self.fontsize(weight) 

3101 

3102 def node_style(self): 

3103 return self.nodestyle 

3104 

3105 def edge_color(self, weight): 

3106 return self.color(weight) 

3107 

3108 def edge_fontsize(self, weight): 

3109 return self.fontsize(weight) 

3110 

3111 def edge_penwidth(self, weight): 

3112 return max(weight*self.maxpenwidth, self.minpenwidth) 

3113 

3114 def edge_arrowsize(self, weight): 

3115 return 0.5 * math.sqrt(self.edge_penwidth(weight)) 

3116 

3117 def fontsize(self, weight): 

3118 return max(weight**2 * self.maxfontsize, self.minfontsize) 

3119 

3120 def color(self, weight): 

3121 weight = min(max(weight, 0.0), 1.0) 

3122 

3123 hmin, smin, lmin = self.mincolor 

3124 hmax, smax, lmax = self.maxcolor 

3125 

3126 if self.skew < 0: 

3127 raise ValueError("Skew must be greater than 0") 

3128 elif self.skew == 1.0: 

3129 h = hmin + weight*(hmax - hmin) 

3130 s = smin + weight*(smax - smin) 

3131 l = lmin + weight*(lmax - lmin) 

3132 else: 

3133 base = self.skew 

3134 h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0)) 

3135 s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0)) 

3136 l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0)) 

3137 

3138 return self.hsl_to_rgb(h, s, l) 

3139 

3140 def hsl_to_rgb(self, h, s, l): 

3141 """Convert a color from HSL color-model to RGB. 

3142 

3143 See also: 

3144 - http://www.w3.org/TR/css3-color/#hsl-color 

3145 """ 

3146 

3147 h = h % 1.0 

3148 s = min(max(s, 0.0), 1.0) 

3149 l = min(max(l, 0.0), 1.0) 

3150 

3151 if l <= 0.5: 

3152 m2 = l*(s + 1.0) 

3153 else: 

3154 m2 = l + s - l*s 

3155 m1 = l*2.0 - m2 

3156 r = self._hue_to_rgb(m1, m2, h + 1.0/3.0) 

3157 g = self._hue_to_rgb(m1, m2, h) 

3158 b = self._hue_to_rgb(m1, m2, h - 1.0/3.0) 

3159 

3160 # Apply gamma correction 

3161 r **= self.gamma 

3162 g **= self.gamma 

3163 b **= self.gamma 

3164 

3165 return (r, g, b) 

3166 

3167 def _hue_to_rgb(self, m1, m2, h): 

3168 if h < 0.0: 

3169 h += 1.0 

3170 elif h > 1.0: 

3171 h -= 1.0 

3172 if h*6 < 1.0: 

3173 return m1 + (m2 - m1)*h*6.0 

3174 elif h*2 < 1.0: 

3175 return m2 

3176 elif h*3 < 2.0: 

3177 return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0 

3178 else: 

3179 return m1 

3180 

3181 

3182TEMPERATURE_COLORMAP = Theme( 

3183 mincolor = (2.0/3.0, 0.80, 0.25), # dark blue 

3184 maxcolor = (0.0, 1.0, 0.5), # satured red 

3185 gamma = 1.0 

3186) 

3187 

3188PINK_COLORMAP = Theme( 

3189 mincolor = (0.0, 1.0, 0.90), # pink 

3190 maxcolor = (0.0, 1.0, 0.5), # satured red 

3191) 

3192 

3193GRAY_COLORMAP = Theme( 

3194 mincolor = (0.0, 0.0, 0.85), # light gray 

3195 maxcolor = (0.0, 0.0, 0.0), # black 

3196) 

3197 

3198BW_COLORMAP = Theme( 

3199 minfontsize = 8.0, 

3200 maxfontsize = 24.0, 

3201 mincolor = (0.0, 0.0, 0.0), # black 

3202 maxcolor = (0.0, 0.0, 0.0), # black 

3203 minpenwidth = 0.1, 

3204 maxpenwidth = 8.0, 

3205) 

3206 

3207PRINT_COLORMAP = Theme( 

3208 minfontsize = 18.0, 

3209 maxfontsize = 30.0, 

3210 fontcolor = "black", 

3211 nodestyle = "solid", 

3212 mincolor = (0.0, 0.0, 0.0), # black 

3213 maxcolor = (0.0, 0.0, 0.0), # black 

3214 minpenwidth = 0.1, 

3215 maxpenwidth = 8.0, 

3216) 

3217 

3218 

3219themes = { 

3220 "color": TEMPERATURE_COLORMAP, 

3221 "pink": PINK_COLORMAP, 

3222 "gray": GRAY_COLORMAP, 

3223 "bw": BW_COLORMAP, 

3224 "print": PRINT_COLORMAP, 

3225} 

3226 

3227 

3228def sorted_iteritems(d): 

3229 # Used mostly for result reproducibility (while testing.) 

3230 keys = list(d.keys()) 

3231 keys.sort() 

3232 for key in keys: 

3233 value = d[key] 

3234 yield key, value 

3235 

3236 

3237class DotWriter: 

3238 """Writer for the DOT language. 

3239 

3240 See also: 

3241 - "The DOT Language" specification 

3242 http://www.graphviz.org/doc/info/lang.html 

3243 """ 

3244 

3245 strip = False 

3246 wrap = False 

3247 

3248 def __init__(self, fp): 

3249 self.fp = fp 

3250 

3251 def wrap_function_name(self, name): 

3252 """Split the function name on multiple lines.""" 

3253 

3254 if len(name) > 32: 

3255 ratio = 2.0/3.0 

3256 height = max(int(len(name)/(1.0 - ratio) + 0.5), 1) 

3257 width = max(len(name)/height, 32) 

3258 # TODO: break lines in symbols 

3259 name = textwrap.fill(name, width, break_long_words=False) 

3260 

3261 # Take away spaces 

3262 name = name.replace(", ", ",") 

3263 name = name.replace("> >", ">>") 

3264 name = name.replace("> >", ">>") # catch consecutive 

3265 

3266 return name 

3267 

3268 show_function_events = [TOTAL_TIME_RATIO, TIME_RATIO] 

3269 show_edge_events = [TOTAL_TIME_RATIO, CALLS] 

3270 

3271 def graphs_compare(self, profile1, profile2, theme, options): 

3272 self.begin_graph() 

3273 

3274 fontname = theme.graph_fontname() 

3275 fontcolor = theme.graph_fontcolor() 

3276 nodestyle = theme.node_style() 

3277 

3278 tolerance, only_slower, only_faster, color_by_difference = ( 

3279 options.tolerance, options.only_slower, options.only_faster, options.color_by_difference) 

3280 self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125) 

3281 self.attr('node', fontname=fontname, style=nodestyle, fontcolor=fontcolor, width=0, height=0) 

3282 self.attr('edge', fontname=fontname) 

3283 

3284 functions2 = {function.name: function for _, function in sorted_iteritems(profile2.functions)} 

3285 

3286 for _, function1 in sorted_iteritems(profile1.functions): 

3287 labels = [] 

3288 

3289 name = function1.name 

3290 try: 

3291 function2 = functions2[name] 

3292 if self.wrap: 

3293 name = self.wrap_function_name(name) 

3294 if color_by_difference: 

3295 min_diff, max_diff = min_max_difference(profile1, profile2) 

3296 labels.append(name) 

3297 weight_difference = 0 

3298 shape = 'box' 

3299 orientation = '0' 

3300 for event in self.show_function_events: 

3301 if event in function1.events: 

3302 event1 = function1[event] 

3303 event2 = function2[event] 

3304 

3305 difference = abs(event1 - event2) * 100 

3306 

3307 if event == TOTAL_TIME_RATIO: 

3308 weight_difference = difference 

3309 if difference >= tolerance: 

3310 if event2 > event1 and not only_faster: 

3311 shape = 'cds' 

3312 label = (f'{event.format(event1)} +' 

3313 f' {round_difference(difference, tolerance)}%') 

3314 elif event2 < event1 and not only_slower: 

3315 orientation = "90" 

3316 shape = 'cds' 

3317 label = (f'{event.format(event1)} - ' 

3318 f'{round_difference(difference, tolerance)}%') 

3319 else: 

3320 # protection to not color by difference if we choose to show only_faster/only_slower 

3321 weight_difference = 0 

3322 label = event.format(function1[event]) 

3323 else: 

3324 weight_difference = 0 

3325 label = event.format(function1[event]) 

3326 else: 

3327 if difference >= tolerance: 

3328 if event2 > event1: 

3329 label = (f'{event.format(event1)} +' 

3330 f' {round_difference(difference, tolerance)}%') 

3331 elif event2 < event1: 

3332 label = (f'{event.format(event1)} - ' 

3333 f'{round_difference(difference, tolerance)}%') 

3334 else: 

3335 label = event.format(function1[event]) 

3336 

3337 labels.append(label) 

3338 if function1.called is not None: 

3339 labels.append(f"{function1.called} {MULTIPLICATION_SIGN}/ {function2.called} {MULTIPLICATION_SIGN}") 

3340 

3341 except KeyError: 

3342 shape = 'box' 

3343 orientation = '0' 

3344 weight_difference = 0 

3345 if function1.process is not None: 

3346 labels.append(function1.process) 

3347 if function1.module is not None: 

3348 labels.append(function1.module) 

3349 

3350 if self.strip: 

3351 function_name = function1.stripped_name() 

3352 else: 

3353 function_name = function1.name 

3354 if color_by_difference: 

3355 min_diff, max_diff = 0, 0 

3356 

3357 # dot can't parse quoted strings longer than YY_BUF_SIZE, which 

3358 # defaults to 16K. But some annotated C++ functions (e.g., boost, 

3359 # https://github.com/jrfonseca/gprof2dot/issues/30) can exceed that 

3360 MAX_FUNCTION_NAME = 4096 

3361 if len(function_name) >= MAX_FUNCTION_NAME: 

3362 sys.stderr.write('warning: truncating function name with %u chars (%s)\n' % (len(function_name), function_name[:32] + '...')) 

3363 function_name = function_name[:MAX_FUNCTION_NAME - 1] + chr(0x2026) 

3364 

3365 if self.wrap: 

3366 function_name = self.wrap_function_name(function_name) 

3367 labels.append(function_name) 

3368 

3369 for event in self.show_function_events: 

3370 if event in function1.events: 

3371 label = event.format(function1[event]) 

3372 labels.append(label) 

3373 if function1.called is not None: 

3374 labels.append("%u%s" % (function1.called, MULTIPLICATION_SIGN)) 

3375 

3376 if color_by_difference and weight_difference: 

3377 # min and max is calculated whe color_by_difference is true 

3378 weight = rescale_difference(weight_difference, min_diff, max_diff) 

3379 

3380 elif function1.weight is not None and not color_by_difference: 

3381 weight = function1.weight 

3382 else: 

3383 weight = 0.0 

3384 

3385 label = '\n'.join(labels) 

3386 

3387 self.node(function1.id, 

3388 label=label, 

3389 orientation=orientation, 

3390 color=self.color(theme.node_bgcolor(weight)), 

3391 shape=shape, 

3392 fontcolor=self.color(theme.node_fgcolor(weight)), 

3393 fontsize="%f" % theme.node_fontsize(weight), 

3394 tooltip=function1.filename, 

3395 ) 

3396 

3397 calls2 = {call.callee_id: call for _, call in sorted_iteritems(function2.calls)} 

3398 functions_by_id1 = {function.id: function for _, function in sorted_iteritems(profile1.functions)} 

3399 

3400 for _, call1 in sorted_iteritems(function1.calls): 

3401 labels = [] 

3402 try: 

3403 # if profiles do not have identical setups, callee_id will not be identical either 

3404 call_id1 = call1.callee_id 

3405 call_name = functions_by_id1[call_id1].name 

3406 call_id2 = functions2[call_name].id 

3407 call2 = calls2[call_id2] 

3408 for event in self.show_edge_events: 

3409 if event in call1.events: 

3410 label = f'{event.format(call1[event])} / {event.format(call2[event])}' 

3411 labels.append(label) 

3412 except KeyError: 

3413 for event in self.show_edge_events: 

3414 if event in call1.events: 

3415 label = f'{event.format(call1[event])}' 

3416 labels.append(label) 

3417 

3418 weight = 0 if color_by_difference else call1.weight 

3419 label = '\n'.join(labels) 

3420 self.edge(function1.id, call1.callee_id, 

3421 label=label, 

3422 color=self.color(theme.edge_color(weight)), 

3423 fontcolor=self.color(theme.edge_color(weight)), 

3424 fontsize="%.2f" % theme.edge_fontsize(weight), 

3425 penwidth="%.2f" % theme.edge_penwidth(weight), 

3426 labeldistance="%.2f" % theme.edge_penwidth(weight), 

3427 arrowsize="%.2f" % theme.edge_arrowsize(weight), 

3428 ) 

3429 self.end_graph() 

3430 

3431 def graph(self, profile, theme): 

3432 self.begin_graph() 

3433 

3434 fontname = theme.graph_fontname() 

3435 fontcolor = theme.graph_fontcolor() 

3436 nodestyle = theme.node_style() 

3437 

3438 self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125) 

3439 self.attr('node', fontname=fontname, shape="box", style=nodestyle, fontcolor=fontcolor, width=0, height=0) 

3440 self.attr('edge', fontname=fontname) 

3441 

3442 for _, function in sorted_iteritems(profile.functions): 

3443 labels = [] 

3444 if function.process is not None: 

3445 labels.append(function.process) 

3446 if function.module is not None: 

3447 labels.append(function.module) 

3448 

3449 if self.strip: 

3450 function_name = function.stripped_name() 

3451 else: 

3452 function_name = function.name 

3453 

3454 # dot can't parse quoted strings longer than YY_BUF_SIZE, which 

3455 # defaults to 16K. But some annotated C++ functions (e.g., boost, 

3456 # https://github.com/jrfonseca/gprof2dot/issues/30) can exceed that 

3457 MAX_FUNCTION_NAME = 4096 

3458 if len(function_name) >= MAX_FUNCTION_NAME: 

3459 sys.stderr.write('warning: truncating function name with %u chars (%s)\n' % (len(function_name), function_name[:32] + '...')) 

3460 function_name = function_name[:MAX_FUNCTION_NAME - 1] + chr(0x2026) 

3461 

3462 if self.wrap: 

3463 function_name = self.wrap_function_name(function_name) 

3464 labels.append(function_name) 

3465 

3466 for event in self.show_function_events: 

3467 if event in function.events: 

3468 label = event.format(function[event]) 

3469 labels.append(label) 

3470 if function.called is not None: 

3471 labels.append("%u%s" % (function.called, MULTIPLICATION_SIGN)) 

3472 

3473 if function.weight is not None: 

3474 weight = function.weight 

3475 else: 

3476 weight = 0.0 

3477 

3478 label = '\n'.join(labels) 

3479 self.node(function.id, 

3480 label = label, 

3481 color = self.color(theme.node_bgcolor(weight)), 

3482 fontcolor = self.color(theme.node_fgcolor(weight)), 

3483 fontsize = "%.2f" % theme.node_fontsize(weight), 

3484 tooltip = function.filename, 

3485 ) 

3486 

3487 for _, call in sorted_iteritems(function.calls): 

3488 callee = profile.functions[call.callee_id] 

3489 

3490 labels = [] 

3491 for event in self.show_edge_events: 

3492 if event in call.events: 

3493 label = event.format(call[event]) 

3494 labels.append(label) 

3495 

3496 if call.weight is not None: 

3497 weight = call.weight 

3498 elif callee.weight is not None: 

3499 weight = callee.weight 

3500 else: 

3501 weight = 0.0 

3502 

3503 label = '\n'.join(labels) 

3504 

3505 self.edge(function.id, call.callee_id, 

3506 label = label, 

3507 color = self.color(theme.edge_color(weight)), 

3508 fontcolor = self.color(theme.edge_color(weight)), 

3509 fontsize = "%.2f" % theme.edge_fontsize(weight), 

3510 penwidth = "%.2f" % theme.edge_penwidth(weight), 

3511 labeldistance = "%.2f" % theme.edge_penwidth(weight), 

3512 arrowsize = "%.2f" % theme.edge_arrowsize(weight), 

3513 ) 

3514 

3515 self.end_graph() 

3516 

3517 def begin_graph(self): 

3518 self.write('digraph {\n') 

3519 # Work-around graphviz bug[1]: unnamed graphs have "%3" tooltip in SVG 

3520 # output. The bug was fixed upstream, but graphviz shipped in recent 

3521 # Linux distros (for example, Ubuntu 24.04) still has the bug. 

3522 # [1] https://gitlab.com/graphviz/graphviz/-/issues/1376 

3523 self.write('\ttooltip=" "\n') 

3524 

3525 def end_graph(self): 

3526 self.write('}\n') 

3527 

3528 def attr(self, what, **attrs): 

3529 self.write("\t") 

3530 self.write(what) 

3531 self.attr_list(attrs) 

3532 self.write(";\n") 

3533 

3534 def node(self, node, **attrs): 

3535 self.write("\t") 

3536 self.node_id(node) 

3537 self.attr_list(attrs) 

3538 self.write(";\n") 

3539 

3540 def edge(self, src, dst, **attrs): 

3541 self.write("\t") 

3542 self.node_id(src) 

3543 self.write(" -> ") 

3544 self.node_id(dst) 

3545 self.attr_list(attrs) 

3546 self.write(";\n") 

3547 

3548 def attr_list(self, attrs): 

3549 if not attrs: 

3550 return 

3551 self.write(' [') 

3552 first = True 

3553 for name, value in sorted_iteritems(attrs): 

3554 if value is None: 

3555 continue 

3556 if first: 

3557 first = False 

3558 else: 

3559 self.write(", ") 

3560 assert isinstance(name, str) 

3561 assert name.isidentifier() 

3562 self.write(name) 

3563 self.write('=') 

3564 self.id(value) 

3565 self.write(']') 

3566 

3567 def node_id(self, id): 

3568 # Node IDs need to be unique (can't be truncated) but dot doesn't allow 

3569 # IDs longer than 16384 characters, so use an hash instead for the huge 

3570 # C++ symbols that can arise, as seen in 

3571 # https://github.com/jrfonseca/gprof2dot/issues/99 

3572 if isinstance(id, str) and len(id) > 1024: 

3573 id = '_' + hashlib.sha1(id.encode('utf-8'), usedforsecurity=False).hexdigest() 

3574 self.id(id) 

3575 

3576 def id(self, id): 

3577 if isinstance(id, (int, float)): 

3578 s = str(id) 

3579 elif isinstance(id, str): 

3580 if id.isalnum() and not id.startswith('0x'): 

3581 s = id 

3582 else: 

3583 s = self.escape(id) 

3584 else: 

3585 raise TypeError 

3586 self.write(s) 

3587 

3588 def color(self, rgb): 

3589 r, g, b = rgb 

3590 

3591 def float2int(f): 

3592 if f <= 0.0: 

3593 return 0 

3594 if f >= 1.0: 

3595 return 255 

3596 return int(255.0*f + 0.5) 

3597 

3598 return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)]) 

3599 

3600 def escape(self, s): 

3601 s = s.replace('\\', r'\\') 

3602 s = s.replace('\n', r'\n') 

3603 s = s.replace('\t', r'\t') 

3604 s = s.replace('"', r'\"') 

3605 return '"' + s + '"' 

3606 

3607 def write(self, s): 

3608 self.fp.write(s) 

3609 

3610 

3611 

3612######################################################################## 

3613# Main program 

3614 

3615 

3616def naturalJoin(values): 

3617 if len(values) >= 2: 

3618 return ', '.join(values[:-1]) + ' or ' + values[-1] 

3619 

3620 else: 

3621 return ''.join(values) 

3622 

3623 

3624def main(argv=sys.argv[1:]): 

3625 """Main program.""" 

3626 

3627 global totalMethod, timeFormat 

3628 

3629 formatNames = list(formats.keys()) 

3630 formatNames.sort() 

3631 

3632 themeNames = list(themes.keys()) 

3633 themeNames.sort() 

3634 

3635 labelNames = list(labels.keys()) 

3636 labelNames.sort() 

3637 

3638 optparser = optparse.OptionParser( 

3639 usage="\n\t%prog [options] [file] ...") 

3640 optparser.add_option( 

3641 '-o', '--output', metavar='FILE', 

3642 type="string", dest="output", 

3643 help="output filename [stdout]") 

3644 optparser.add_option( 

3645 '-n', '--node-thres', metavar='PERCENTAGE', 

3646 type="float", dest="node_thres", default=0.5, 

3647 help="eliminate nodes below this threshold [default: %default]") 

3648 optparser.add_option( 

3649 '-e', '--edge-thres', metavar='PERCENTAGE', 

3650 type="float", dest="edge_thres", default=0.1, 

3651 help="eliminate edges below this threshold [default: %default]") 

3652 optparser.add_option( 

3653 '-f', '--format', 

3654 type="choice", choices=formatNames, 

3655 dest="format", default="prof", 

3656 help="profile format: %s [default: %%default]" % naturalJoin(formatNames)) 

3657 optparser.add_option( 

3658 '--total', 

3659 type="choice", choices=('callratios', 'callstacks'), 

3660 dest="totalMethod", default=totalMethod, 

3661 help="preferred method of calculating total time: callratios or callstacks (currently affects only perf format) [default: %default]") 

3662 optparser.add_option( 

3663 '-c', '--colormap', 

3664 type="choice", choices=themeNames, 

3665 dest="theme", default="color", 

3666 help="color map: %s [default: %%default]" % naturalJoin(themeNames)) 

3667 optparser.add_option( 

3668 '-s', '--strip', 

3669 action="store_true", 

3670 dest="strip", default=False, 

3671 help="strip function parameters, template parameters, and const modifiers from demangled C++ function names") 

3672 optparser.add_option( 

3673 '--color-nodes-by-selftime', 

3674 action="store_true", 

3675 dest="color_nodes_by_selftime", default=False, 

3676 help="color nodes by self time, rather than by total time (sum of self and descendants)") 

3677 optparser.add_option( 

3678 '--colour-nodes-by-selftime', 

3679 action="store_true", 

3680 dest="color_nodes_by_selftime", 

3681 help=optparse.SUPPRESS_HELP) 

3682 optparser.add_option( 

3683 '-w', '--wrap', 

3684 action="store_true", 

3685 dest="wrap", default=False, 

3686 help="wrap function names") 

3687 optparser.add_option( 

3688 '--show-samples', 

3689 action="store_true", 

3690 dest="show_samples", default=False, 

3691 help="show function samples") 

3692 optparser.add_option( 

3693 '--time-format', 

3694 default=timeFormat, 

3695 help="format to use for showing time values [default: %default]") 

3696 optparser.add_option( 

3697 '--node-label', metavar='MEASURE', 

3698 type='choice', choices=labelNames, 

3699 action='append', 

3700 dest='node_labels', 

3701 help="measurements to on show the node (can be specified multiple times): %s [default: %s]" % ( 

3702 naturalJoin(labelNames), ', '.join(defaultLabelNames))) 

3703 # add option to show information on available entries () 

3704 optparser.add_option( 

3705 '--list-functions', 

3706 type="string", 

3707 dest="list_functions", default=None, 

3708 help="""\ 

3709list functions available for selection in -z or -l, requires selector argument 

3710( use '+' to select all). 

3711Recall that the selector argument is used with Unix/Bash globbing/pattern matching, 

3712and that entries are formatted '<pkg>:<linenum>:<function>'. When argument starts 

3713with '%', a dump of all available information is performed for selected entries, 

3714 after removal of leading '%'. 

3715""") 

3716 # add option to create subtree or show paths 

3717 optparser.add_option( 

3718 '-z', '--root', 

3719 type="string", 

3720 dest="root", default="", 

3721 help="prune call graph to show only descendants of specified root function") 

3722 optparser.add_option( 

3723 '-l', '--leaf', 

3724 type="string", 

3725 dest="leaf", default="", 

3726 help="prune call graph to show only ancestors of specified leaf function") 

3727 optparser.add_option( 

3728 '--depth', 

3729 type="int", 

3730 dest="depth", default=-1, 

3731 help="prune call graph to show only descendants or ancestors until specified depth") 

3732 # add a new option to control skew of the colorization curve 

3733 optparser.add_option( 

3734 '--skew', 

3735 type="float", dest="theme_skew", default=1.0, 

3736 help="skew the colorization curve. Values < 1.0 give more variety to lower percentages. Values > 1.0 give less variety to lower percentages") 

3737 # add option for filtering by file path 

3738 optparser.add_option( 

3739 '-p', '--path', action="append", 

3740 type="string", dest="filter_paths", 

3741 help="Filter all modules not in a specified path") 

3742 optparser.add_option( 

3743 '--compare', 

3744 action="store_true", 

3745 dest="compare", default=False, 

3746 help="Compare two graphs with almost identical structure. With this option two files should be provided." 

3747 "gprof2dot.py [options] --compare [file1] [file2] ...") 

3748 optparser.add_option( 

3749 '--compare-tolerance', 

3750 type="float", dest="tolerance", default=0.001, 

3751 help="Tolerance threshold for node difference (default=0.001%)." 

3752 "If the difference is below this value the nodes are considered identical.") 

3753 optparser.add_option( 

3754 '--compare-only-slower', 

3755 action="store_true", 

3756 dest="only_slower", default=False, 

3757 help="Display comparison only for function which are slower in second graph.") 

3758 optparser.add_option( 

3759 '--compare-only-faster', 

3760 action="store_true", 

3761 dest="only_faster", default=False, 

3762 help="Display comparison only for function which are faster in second graph.") 

3763 optparser.add_option( 

3764 '--compare-color-by-difference', 

3765 action="store_true", 

3766 dest="color_by_difference", default=False, 

3767 help="Color nodes based on the value of the difference. " 

3768 "Nodes with the largest differences represent the hot spots.") 

3769 (options, args) = optparser.parse_args(argv) 

3770 

3771 if len(args) > 1 and options.format != 'pstats' and not options.compare: 

3772 optparser.error('incorrect number of arguments') 

3773 

3774 try: 

3775 theme = themes[options.theme] 

3776 except KeyError: 

3777 optparser.error('invalid colormap \'%s\'' % options.theme) 

3778 

3779 # set skew on the theme now that it has been picked. 

3780 if options.theme_skew: 

3781 theme.skew = options.theme_skew 

3782 

3783 totalMethod = options.totalMethod 

3784 timeFormat = options.time_format 

3785 

3786 try: 

3787 Format = formats[options.format] 

3788 except KeyError: 

3789 optparser.error('invalid format \'%s\'' % options.format) 

3790 

3791 if Format.stdinInput: 

3792 if not args: 

3793 fp = sys.stdin 

3794 parser = Format(fp) 

3795 elif options.compare: 

3796 fp1 = open(args[0], 'rt', encoding='UTF-8') 

3797 fp2 = open(args[1], 'rt', encoding='UTF-8') 

3798 parser1 = Format(fp1) 

3799 parser2 = Format(fp2) 

3800 else: 

3801 fp = open(args[0], 'rb') 

3802 bom = fp.read(2) 

3803 if bom == codecs.BOM_UTF16_LE: 

3804 # Default on Windows PowerShell (https://github.com/jrfonseca/gprof2dot/issues/88) 

3805 encoding = 'utf-16le' 

3806 else: 

3807 encoding = 'utf-8' 

3808 fp.seek(0) 

3809 fp = io.TextIOWrapper(fp, encoding=encoding) 

3810 parser = Format(fp) 

3811 elif Format.multipleInput: 

3812 if not args: 

3813 optparser.error('at least a file must be specified for %s input' % options.format) 

3814 if options.compare: 

3815 parser1 = Format(args[-2]) 

3816 parser2 = Format(args[-1]) 

3817 else: 

3818 parser = Format(*args) 

3819 else: 

3820 if len(args) != 1: 

3821 optparser.error('exactly one file must be specified for %s input' % options.format) 

3822 parser = Format(args[0]) 

3823 

3824 if options.compare: 

3825 profile1 = parser1.parse() 

3826 profile2 = parser2.parse() 

3827 else: 

3828 profile = parser.parse() 

3829 

3830 if options.output is None: 

3831 output = open(sys.stdout.fileno(), mode='wt', encoding='UTF-8', closefd=False) 

3832 else: 

3833 output = open(options.output, 'wt', encoding='UTF-8') 

3834 

3835 dot = DotWriter(output) 

3836 dot.strip = options.strip 

3837 dot.wrap = options.wrap 

3838 

3839 labelNames = options.node_labels or defaultLabelNames 

3840 dot.show_function_events = [labels[l] for l in labelNames] 

3841 if options.show_samples: 

3842 dot.show_function_events.append(SAMPLES) 

3843 

3844 if options.compare: 

3845 profile1.prune(options.node_thres/100.0, options.edge_thres/100.0, options.filter_paths, 

3846 options.color_nodes_by_selftime) 

3847 profile2.prune(options.node_thres/100.0, options.edge_thres/100.0, options.filter_paths, 

3848 options.color_nodes_by_selftime) 

3849 

3850 if options.root: 

3851 profile1.prune_root(profile1.getFunctionIds(options.root), options.depth) 

3852 profile2.prune_root(profile2.getFunctionIds(options.root), options.depth) 

3853 else: 

3854 profile.prune(options.node_thres/100.0, options.edge_thres/100.0, options.filter_paths, 

3855 options.color_nodes_by_selftime) 

3856 if options.root: 

3857 rootIds = profile.getFunctionIds(options.root) 

3858 if not rootIds: 

3859 sys.stderr.write('root node ' + options.root + ' not found (might already be pruned : try -e0 -n0 flags)\n') 

3860 sys.exit(1) 

3861 profile.prune_root(rootIds, options.depth) 

3862 

3863 if options.list_functions: 

3864 profile.printFunctionIds(selector=options.list_functions) 

3865 sys.exit(0) 

3866 

3867 if options.leaf: 

3868 leafIds = profile.getFunctionIds(options.leaf) 

3869 if not leafIds: 

3870 sys.stderr.write('leaf node ' + options.leaf + ' not found (maybe already pruned : try -e0 -n0 flags)\n') 

3871 sys.exit(1) 

3872 profile.prune_leaf(leafIds, options.depth) 

3873 

3874 if options.compare: 

3875 dot.graphs_compare(profile1, profile2, theme, options) 

3876 else: 

3877 dot.graph(profile, theme) 

3878 

3879 

3880if __name__ == '__main__': 

3881 main()