Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/client/timeline.py: 19%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Timeline visualization for TensorFlow using Chrome Trace Format."""

17import collections

18import copy

19import json

20import re

22# The timeline target is usually imported as part of BUILD target

23# "platform_test", which includes also includes the "platform"

24# dependency. This is why the logging import here is okay.

25from tensorflow.python.platform import build_info

26from tensorflow.python.platform import tf_logging as logging

29class AllocationMaximum(collections.namedtuple(

30 'AllocationMaximum', ('timestamp', 'num_bytes', 'tensors'))):

31 """Stores the maximum allocation for a given allocator within the timelne.

33 Parameters:

34 timestamp: `tensorflow::Env::NowMicros()` when this maximum was reached.

35 num_bytes: the total memory used at this time.

36 tensors: the set of tensors allocated at this time.

37 """

38 pass

41class StepStatsAnalysis(collections.namedtuple(

42 'StepStatsAnalysis', ('chrome_trace', 'allocator_maximums'))):

43 """Stores the step stats analysis output.

45 Parameters:

46 chrome_trace: A dict containing the chrome trace analysis.

47 allocator_maximums: A dict mapping allocator names to AllocationMaximum.

48 """

49 pass

52class _ChromeTraceFormatter(object):

53 """A helper class for generating traces in Chrome Trace Format."""

55 def __init__(self, show_memory=False):

56 """Constructs a new Chrome Trace formatter."""

57 self._show_memory = show_memory

58 self._events = []

59 self._metadata = []

61 def _create_event(self, ph, category, name, pid, tid, timestamp):

62 """Creates a new Chrome Trace event.

64 For details of the file format, see:

65 https://github.com/catapult-project/catapult/blob/master/tracing/README.md

67 Args:

68 ph: The type of event - usually a single character.

69 category: The event category as a string.

70 name: The event name as a string.

71 pid: Identifier of the process generating this event as an integer.

72 tid: Identifier of the thread generating this event as an integer.

73 timestamp: The timestamp of this event as a long integer.

75 Returns:

76 A JSON compatible event object.

77 """

78 event = {}

79 event['ph'] = ph

80 event['cat'] = category

81 event['name'] = name

82 event['pid'] = pid

83 event['tid'] = tid

84 event['ts'] = timestamp

85 return event

87 def emit_pid(self, name, pid):

88 """Adds a process metadata event to the trace.

90 Args:

91 name: The process name as a string.

92 pid: Identifier of the process as an integer.

93 """

94 event = {}

95 event['name'] = 'process_name'

96 event['ph'] = 'M'

97 event['pid'] = pid

98 event['args'] = {'name': name}

99 self._metadata.append(event)

100

101 def emit_tid(self, name, pid, tid):

102 """Adds a thread metadata event to the trace.

103

104 Args:

105 name: The thread name as a string.

106 pid: Identifier of the process as an integer.

107 tid: Identifier of the thread as an integer.

108 """

109 event = {}

110 event['name'] = 'thread_name'

111 event['ph'] = 'M'

112 event['pid'] = pid

113 event['tid'] = tid

114 event['args'] = {'name': name}

115 self._metadata.append(event)

116

117 def emit_region(self, timestamp, duration, pid, tid, category, name, args):

118 """Adds a region event to the trace.

119

120 Args:

121 timestamp: The start timestamp of this region as a long integer.

122 duration: The duration of this region as a long integer.

123 pid: Identifier of the process generating this event as an integer.

124 tid: Identifier of the thread generating this event as an integer.

125 category: The event category as a string.

126 name: The event name as a string.

127 args: A JSON-compatible dictionary of event arguments.

128 """

129 event = self._create_event('X', category, name, pid, tid, timestamp)

130 event['dur'] = duration

131 event['args'] = args

132 self._events.append(event)

133

134 def emit_obj_create(self, category, name, timestamp, pid, tid, object_id):

135 """Adds an object creation event to the trace.

136

137 Args:

138 category: The event category as a string.

139 name: The event name as a string.

140 timestamp: The timestamp of this event as a long integer.

141 pid: Identifier of the process generating this event as an integer.

142 tid: Identifier of the thread generating this event as an integer.

143 object_id: Identifier of the object as an integer.

144 """

145 event = self._create_event('N', category, name, pid, tid, timestamp)

146 event['id'] = object_id

147 self._events.append(event)

148

149 def emit_obj_delete(self, category, name, timestamp, pid, tid, object_id):

150 """Adds an object deletion event to the trace.

151

152 Args:

153 category: The event category as a string.

154 name: The event name as a string.

155 timestamp: The timestamp of this event as a long integer.

156 pid: Identifier of the process generating this event as an integer.

157 tid: Identifier of the thread generating this event as an integer.

158 object_id: Identifier of the object as an integer.

159 """

160 event = self._create_event('D', category, name, pid, tid, timestamp)

161 event['id'] = object_id

162 self._events.append(event)

163

164 def emit_obj_snapshot(self, category, name, timestamp, pid, tid, object_id,

165 snapshot):

166 """Adds an object snapshot event to the trace.

167

168 Args:

169 category: The event category as a string.

170 name: The event name as a string.

171 timestamp: The timestamp of this event as a long integer.

172 pid: Identifier of the process generating this event as an integer.

173 tid: Identifier of the thread generating this event as an integer.

174 object_id: Identifier of the object as an integer.

175 snapshot: A JSON-compatible representation of the object.

176 """

177 event = self._create_event('O', category, name, pid, tid, timestamp)

178 event['id'] = object_id

179 event['args'] = {'snapshot': snapshot}

180 self._events.append(event)

181

182 def emit_flow_start(self, name, timestamp, pid, tid, flow_id):

183 """Adds a flow start event to the trace.

184

185 When matched with a flow end event (with the same 'flow_id') this will

186 cause the trace viewer to draw an arrow between the start and end events.

187

188 Args:

189 name: The event name as a string.

190 timestamp: The timestamp of this event as a long integer.

191 pid: Identifier of the process generating this event as an integer.

192 tid: Identifier of the thread generating this event as an integer.

193 flow_id: Identifier of the flow as an integer.

194 """

195 event = self._create_event('s', 'DataFlow', name, pid, tid, timestamp)

196 event['id'] = flow_id

197 self._events.append(event)

198

199 def emit_flow_end(self, name, timestamp, pid, tid, flow_id):

200 """Adds a flow end event to the trace.

201

202 When matched with a flow start event (with the same 'flow_id') this will

203 cause the trace viewer to draw an arrow between the start and end events.

204

205 Args:

206 name: The event name as a string.

207 timestamp: The timestamp of this event as a long integer.

208 pid: Identifier of the process generating this event as an integer.

209 tid: Identifier of the thread generating this event as an integer.

210 flow_id: Identifier of the flow as an integer.

211 """

212 event = self._create_event('t', 'DataFlow', name, pid, tid, timestamp)

213 event['id'] = flow_id

214 self._events.append(event)

215

216 def emit_counter(self, category, name, pid, timestamp, counter, value):

217 """Emits a record for a single counter.

218

219 Args:

220 category: The event category as a string.

221 name: The event name as a string.

222 pid: Identifier of the process generating this event as an integer.

223 timestamp: The timestamp of this event as a long integer.

224 counter: Name of the counter as a string.

225 value: Value of the counter as an integer.

226 """

227 event = self._create_event('C', category, name, pid, 0, timestamp)

228 event['args'] = {counter: value}

229 self._events.append(event)

230

231 def emit_counters(self, category, name, pid, timestamp, counters):

232 """Emits a counter record for the dictionary 'counters'.

233

234 Args:

235 category: The event category as a string.

236 name: The event name as a string.

237 pid: Identifier of the process generating this event as an integer.

238 timestamp: The timestamp of this event as a long integer.

239 counters: Dictionary of counter values.

240 """

241 event = self._create_event('C', category, name, pid, 0, timestamp)

242 event['args'] = counters.copy()

243 self._events.append(event)

244

245 def format_to_string(self, pretty=False):

246 """Formats the chrome trace to a string.

247

248 Args:

249 pretty: (Optional.) If True, produce human-readable JSON output.

250

251 Returns:

252 A JSON-formatted string in Chrome Trace format.

253 """

254 trace = {}

255 trace['traceEvents'] = self._metadata + self._events

256 if pretty:

257 return json.dumps(trace, indent=4, separators=(',', ': '))

258 else:

259 return json.dumps(trace, separators=(',', ':'))

260

261

262class _TensorTracker(object):

263 """An internal class to track the lifetime of a Tensor."""

264

265 def __init__(self, name, object_id, timestamp, pid, allocator, num_bytes):

266 """Creates an object to track tensor references.

267

268 This class is not thread safe and is intended only for internal use by

269 the 'Timeline' class in this file.

270

271 Args:

272 name: The name of the Tensor as a string.

273 object_id: Chrome Trace object identifier assigned for this Tensor.

274 timestamp: The creation timestamp of this event as a long integer.

275 pid: Process identifier of the associated device, as an integer.

276 allocator: Name of the allocator used to create the Tensor.

277 num_bytes: Number of bytes allocated (long integer).

278

279 Returns:

280 A 'TensorTracker' object.

281 """

282 self._name = name

283 self._pid = pid

284 self._object_id = object_id

285 self._create_time = timestamp

286 self._allocator = allocator

287 self._num_bytes = num_bytes

288 self._ref_times = []

289 self._unref_times = []

290

291 @property

292 def name(self):

293 """Name of this tensor."""

294 return self._name

295

296 @property

297 def pid(self):

298 """ID of the process which created this tensor (an integer)."""

299 return self._pid

300

301 @property

302 def create_time(self):

303 """Timestamp when this tensor was created (long integer)."""

304 return self._create_time

305

306 @property

307 def object_id(self):

308 """Returns the object identifier of this tensor (integer)."""

309 return self._object_id

310

311 @property

312 def num_bytes(self):

313 """Size of this tensor in bytes (long integer)."""

314 return self._num_bytes

315

316 @property

317 def allocator(self):

318 """Name of the allocator used to create this tensor (string)."""

319 return self._allocator

320

321 @property

322 def last_unref(self):

323 """Last unreference timestamp of this tensor (long integer)."""

324 return max(self._unref_times)

325

326 def add_ref(self, timestamp):

327 """Adds a reference to this tensor with the specified timestamp.

328

329 Args:

330 timestamp: Timestamp of object reference as an integer.

331 """

332 self._ref_times.append(timestamp)

333

334 def add_unref(self, timestamp):

335 """Adds an unref to this tensor with the specified timestamp.

336

337 Args:

338 timestamp: Timestamp of object unreference as an integer.

339 """

340 self._unref_times.append(timestamp)

341

342

343class Timeline(object):

344 """A class for visualizing execution timelines of TensorFlow steps."""

345

346 def __init__(self, step_stats, graph=None):

347 """Constructs a new Timeline.

348

349 A 'Timeline' is used for visualizing the execution of a TensorFlow

350 computation. It shows the timings and concurrency of execution at

351 the granularity of TensorFlow Ops.

352 This class is not thread safe.

353

354 Args:

355 step_stats: The 'StepStats' proto recording execution times.

356 graph: (Optional) The 'Graph' that was executed.

357 """

358

359 self._origin_step_stats = step_stats

360 self._step_stats = None

361 self._graph = graph

362 self._chrome_trace = _ChromeTraceFormatter()

363 self._next_pid = 0

364 self._device_pids = {} # device name -> pid for compute activity.

365 self._tensor_pids = {} # device name -> pid for tensors.

366 self._tensors = {} # tensor_name -> TensorTracker

367 self._next_flow_id = 0

368 self._flow_starts = {} # tensor_name -> (timestamp, pid, tid)

369 self._alloc_times = {} # tensor_name -> ( time, allocator, size )

370 self._allocator_maximums = {} # allocator name => maximum bytes long

371

372 def _alloc_pid(self):

373 """Allocate a process Id."""

374 pid = self._next_pid

375 self._next_pid += 1

376 return pid

377

378 def _alloc_flow_id(self):

379 """Allocate a flow Id."""

380 flow_id = self._next_flow_id

381 self._next_flow_id += 1

382 return flow_id

383

384 def _parse_op_label(self, label):

385 """Parses the fields in a node timeline label."""

386 # Expects labels of the form: name = op(arg, arg, ...).

387 match = re.match(r'(.*) = (.*)\((.*)\)', label)

388 if match is None:

389 return 'unknown', 'unknown', []

390 nn, op, inputs = match.groups()

391 if not inputs:

392 inputs = []

393 else:

394 inputs = inputs.split(', ')

395 return nn, op, inputs

396

397 def _parse_kernel_label(self, label, node_name):

398 """Parses the fields in a node timeline label."""

399 # Expects labels of the form: retval (arg) detail @@annotation

400 start = label.find('@@')

401 end = label.find('#')

402 if start >= 0 and end >= 0 and start + 2 < end:

403 node_name = label[start + 2:end]

404 # Node names should always have the form 'name:op'.

405 fields = node_name.split(':') + ['unknown']

406 name, op = fields[:2]

407 return name, op

408

409 def _assign_lanes(self):

410 """Assigns non-overlapping lanes for the activities on each device."""

411 for device_stats in self._step_stats.dev_stats:

412 # TODO(pbar): Genuine thread IDs in NodeExecStats might be helpful.

413 lanes = [0]

414 for ns in device_stats.node_stats:

415 l = -1

416 for (i, lts) in enumerate(lanes):

417 if ns.all_start_micros > lts:

418 l = i

419 lanes[l] = ns.all_start_micros + ns.all_end_rel_micros

420 break

421 if l < 0:

422 l = len(lanes)

423 lanes.append(ns.all_start_micros + ns.all_end_rel_micros)

424 ns.thread_id = l

425

426 def _emit_op(self, nodestats, pid, is_gputrace):

427 """Generates a Chrome Trace event to show Op execution.

428

429 Args:

430 nodestats: The 'NodeExecStats' proto recording op execution.

431 pid: The pid assigned for the device where this op ran.

432 is_gputrace: If True then this op came from the GPUTracer.

433 """

434 node_name = nodestats.node_name

435 start = nodestats.all_start_micros

436 duration = nodestats.all_end_rel_micros

437 tid = nodestats.thread_id

438 inputs = []

439 if is_gputrace:

440 node_name, op = self._parse_kernel_label(nodestats.timeline_label,

441 node_name)

442 elif node_name == 'RecvTensor':

443 # RPC tracing does not use the standard timeline_label format.

444 op = 'RecvTensor'

445 else:

446 _, op, inputs = self._parse_op_label(nodestats.timeline_label)

447 args = {'name': node_name, 'op': op}

448 if build_info.build_info['is_rocm_build']:

449 args['kernel'] = nodestats.timeline_label.split('@@')[0]

450 for i, iname in enumerate(inputs):

451 args['input%d' % i] = iname

452 self._chrome_trace.emit_region(start, duration, pid, tid, 'Op', op, args)

453

454 def _emit_tensor_snapshot(self, tensor, timestamp, pid, tid, value):

455 """Generate Chrome Trace snapshot event for a computed Tensor.

456

457 Args:

458 tensor: A 'TensorTracker' object.

459 timestamp: The timestamp of this snapshot as a long integer.

460 pid: The pid assigned for showing the device where this op ran.

461 tid: The tid of the thread computing the tensor snapshot.

462 value: A JSON-compliant snapshot of the object.

463 """

464 desc = str(value.tensor_description).replace('"', '')

465 snapshot = {'tensor_description': desc}

466 self._chrome_trace.emit_obj_snapshot('Tensor', tensor.name, timestamp, pid,

467 tid, tensor.object_id, snapshot)

468

469 def _produce_tensor(self, name, timestamp, tensors_pid, allocator, num_bytes):

470 object_id = len(self._tensors)

471 tensor = _TensorTracker(name, object_id, timestamp, tensors_pid, allocator,

472 num_bytes)

473 self._tensors[name] = tensor

474 return tensor

475

476 def _is_gputrace_device(self, device_name):

477 """Returns true if this device is part of the GPUTracer logging."""

478 return '/stream:' in device_name or '/memcpy' in device_name

479

480 def _allocate_pids(self):

481 """Allocate fake process ids for each device in the StepStats."""

482 self._allocators_pid = self._alloc_pid()

483 self._chrome_trace.emit_pid('Allocators', self._allocators_pid)

484

485 # Add processes in the Chrome trace to show compute and data activity.

486 for dev_stats in self._step_stats.dev_stats:

487 device_pid = self._alloc_pid()

488 self._device_pids[dev_stats.device] = device_pid

489 tensors_pid = self._alloc_pid()

490 self._tensor_pids[dev_stats.device] = tensors_pid

491 self._chrome_trace.emit_pid(dev_stats.device + ' Compute', device_pid)

492 self._chrome_trace.emit_pid(dev_stats.device + ' Tensors', tensors_pid)

493

494 def _analyze_tensors(self, show_memory):

495 """Analyze tensor references to track dataflow."""

496 for dev_stats in self._step_stats.dev_stats:

497 device_pid = self._device_pids[dev_stats.device]

498 tensors_pid = self._tensor_pids[dev_stats.device]

499 for node_stats in dev_stats.node_stats:

500 tid = node_stats.thread_id

501 node_name = node_stats.node_name

502 start_time = node_stats.all_start_micros

503 end_time = node_stats.all_start_micros + node_stats.all_end_rel_micros

504 for index, output in enumerate(node_stats.output):

505 if index:

506 output_name = '%s:%d' % (node_name, index)

507 else:

508 output_name = node_name

509

510 allocation = output.tensor_description.allocation_description

511 num_bytes = allocation.requested_bytes

512 allocator_name = allocation.allocator_name

513 tensor = self._produce_tensor(output_name, start_time, tensors_pid,

514 allocator_name, num_bytes)

515 tensor.add_ref(start_time)

516 tensor.add_unref(end_time)

517 self._flow_starts[output_name] = (end_time, device_pid, tid)

518

519 if show_memory:

520 self._chrome_trace.emit_obj_create('Tensor', output_name,

521 start_time, tensors_pid, tid,

522 tensor.object_id)

523 self._emit_tensor_snapshot(tensor, end_time - 1, tensors_pid, tid,

524 output)

525

526 def _show_compute(self, show_dataflow):

527 """Visualize the computation activity."""

528 for dev_stats in self._step_stats.dev_stats:

529 device_name = dev_stats.device

530 device_pid = self._device_pids[device_name]

531 is_gputrace = self._is_gputrace_device(device_name)

532

533 for node_stats in dev_stats.node_stats:

534 tid = node_stats.thread_id

535 start_time = node_stats.all_start_micros

536 end_time = node_stats.all_start_micros + node_stats.all_end_rel_micros

537 self._emit_op(node_stats, device_pid, is_gputrace)

538

539 if is_gputrace or node_stats.node_name == 'RecvTensor':

540 continue

541

542 _, _, inputs = self._parse_op_label(node_stats.timeline_label)

543 for input_name in inputs:

544 if input_name not in self._tensors:

545 # This can happen when partitioning has inserted a Send/Recv.

546 # We remove the numeric suffix so that the dataflow appears to

547 # come from the original node. Ideally, the StepStats would

548 # contain logging for the Send and Recv nodes.

549 index = input_name.rfind('/_')

550 if index > 0:

551 input_name = input_name[:index]

552

553 if input_name in self._tensors:

554 tensor = self._tensors[input_name]

555 tensor.add_ref(start_time)

556 tensor.add_unref(end_time - 1)

557

558 if show_dataflow:

559 # We use a different flow ID for every graph edge.

560 create_time, create_pid, create_tid = self._flow_starts[

561 input_name]

562 # Don't add flows when producer and consumer ops are on the same

563 # pid/tid since the horizontal arrows clutter the visualization.

564 if create_pid != device_pid or create_tid != tid:

565 flow_id = self._alloc_flow_id()

566 self._chrome_trace.emit_flow_start(input_name, create_time,

567 create_pid, create_tid,

568 flow_id)

569 self._chrome_trace.emit_flow_end(input_name, start_time,

570 device_pid, tid, flow_id)

571 else:

572 logging.vlog(1, 'Can\'t find tensor %s - removed by CSE?',

573 input_name)

574

575 def _show_memory_counters(self):

576 """Produce a counter series for each memory allocator."""

577 # Iterate over all tensor trackers to build a list of allocations and

578 # frees for each allocator. Then sort the lists and emit a cumulative

579 # counter series for each allocator.

580 allocations = {}

581 for name in self._tensors:

582 tensor = self._tensors[name]

583 self._chrome_trace.emit_obj_delete('Tensor', name, tensor.last_unref,

584 tensor.pid, 0, tensor.object_id)

585 allocator = tensor.allocator

586 if allocator not in allocations:

587 allocations[allocator] = []

588 num_bytes = tensor.num_bytes

589 allocations[allocator].append((tensor.create_time, num_bytes, name))

590 allocations[allocator].append((tensor.last_unref, -num_bytes, name))

591

592 alloc_maxes = {}

593

594 # Generate a counter series showing total allocations for each allocator.

595 for allocator in allocations:

596 alloc_list = allocations[allocator]

597 alloc_list.sort()

598 total_bytes = 0

599 alloc_tensor_set = set()

600 alloc_maxes[allocator] = AllocationMaximum(

601 timestamp=0, num_bytes=0, tensors=set())

602 for time, num_bytes, name in sorted(

603 alloc_list, key=lambda allocation: allocation[0]):

604 total_bytes += num_bytes

605 if num_bytes < 0:

606 alloc_tensor_set.discard(name)

607 else:

608 alloc_tensor_set.add(name)

609

610 if total_bytes > alloc_maxes[allocator].num_bytes:

611 alloc_maxes[allocator] = AllocationMaximum(

612 timestamp=time,

613 num_bytes=total_bytes,

614 tensors=copy.deepcopy(alloc_tensor_set))

615

616 self._chrome_trace.emit_counter('Memory', allocator,

617 self._allocators_pid, time, allocator,

618 total_bytes)

619 self._allocator_maximums = alloc_maxes

620

621 def _preprocess_op_time(self, op_time):

622 """Update the start and end time of ops in step stats.

623

624 Args:

625 op_time: How the execution time of op is shown in timeline. Possible values

626 are "schedule", "gpu" and "all". "schedule" will show op from the time it

627 is scheduled to the end of the scheduling. Notice by the end of its

628 scheduling its async kernels may not start yet. It is shown using the

629 default value from step_stats. "gpu" will show op with the execution time

630 of its kernels on GPU. "all" will show op from the start of its scheduling

631 to the end of its last kernel.

632 """

633 if op_time == 'schedule':

634 self._step_stats = self._origin_step_stats

635 return

636 self._step_stats = copy.deepcopy(self._origin_step_stats)

637 # Separate job task and gpu tracer stream

638 stream_all_stats = []

639 job_stats = []

640 for stats in self._step_stats.dev_stats:

641 if '/stream:all' in stats.device:

642 stream_all_stats.append(stats)

643 elif '/job' in stats.device:

644 job_stats.append(stats)

645

646 # Record the start time of the first kernel and the end time of

647 # the last gpu kernel for all ops.

648 op_gpu_start = {}

649 op_gpu_end = {}

650 for stats in stream_all_stats:

651 for kernel in stats.node_stats:

652 name, _ = self._parse_kernel_label(kernel.timeline_label,

653 kernel.node_name)

654 start = kernel.all_start_micros

655 end = kernel.all_start_micros + kernel.all_end_rel_micros

656 if name in op_gpu_start:

657 op_gpu_start[name] = min(op_gpu_start[name], start)

658 op_gpu_end[name] = max(op_gpu_end[name], end)

659 else:

660 op_gpu_start[name] = start

661 op_gpu_end[name] = end

662

663 # Update the start and end time of each op according to the op_time

664 for stats in job_stats:

665 for op in stats.node_stats:

666 if op.node_name in op_gpu_start:

667 end = max(op_gpu_end[op.node_name],

668 op.all_start_micros + op.all_end_rel_micros)

669 if op_time == 'gpu':

670 op.all_start_micros = op_gpu_start[op.node_name]

671 op.all_end_rel_micros = end - op.all_start_micros

672

673 def analyze_step_stats(self,

674 show_dataflow=True,

675 show_memory=True,

676 op_time='schedule'):

677 """Analyze the step stats and format it into Chrome Trace Format.

678

679 Args:

680 show_dataflow: (Optional.) If True, add flow events to the trace

681 connecting producers and consumers of tensors.

682 show_memory: (Optional.) If True, add object snapshot events to the trace

683 showing the sizes and lifetimes of tensors.

684 op_time: (Optional.) How the execution time of op is shown in timeline.

685 Possible values are "schedule", "gpu" and "all". "schedule" will show op

686 from the time it is scheduled to the end of the scheduling. Notice by

687 the end of its scheduling its async kernels may not start yet. It is

688 shown using the default value from step_stats. "gpu" will show op with

689 the execution time of its kernels on GPU. "all" will show op from the

690 start of its scheduling to the end of its last kernel.

691

692 Returns:

693 A 'StepStatsAnalysis' object.

694 """

695 self._preprocess_op_time(op_time)

696 self._allocate_pids()

697 self._assign_lanes()

698 self._analyze_tensors(show_memory)

699 self._show_compute(show_dataflow)

700 if show_memory:

701 self._show_memory_counters()

702 return StepStatsAnalysis(

703 chrome_trace=self._chrome_trace,

704 allocator_maximums=self._allocator_maximums)

705

706 def generate_chrome_trace_format(self,

707 show_dataflow=True,

708 show_memory=False,

709 op_time='schedule'):

710 """Produces a trace in Chrome Trace Format.

711

712 Args:

713 show_dataflow: (Optional.) If True, add flow events to the trace

714 connecting producers and consumers of tensors.

715 show_memory: (Optional.) If True, add object snapshot events to the trace

716 showing the sizes and lifetimes of tensors.

717 op_time: (Optional.) How the execution time of op is shown in timeline.

718 Possible values are "schedule", "gpu" and "all".

719 "schedule" will show op from the time it is scheduled to the end of

720 the scheduling.

721 Notice by the end of its scheduling its async kernels may not start

722 yet. It is shown using the default value from step_stats.

723 "gpu" will show op with the execution time of its kernels on GPU.

724 "all" will show op from the start of its scheduling to the end of

725 its last kernel.

726

727 Returns:

728 A JSON formatted string in Chrome Trace format.

729 """

730 step_stats_analysis = self.analyze_step_stats(

731 show_dataflow=show_dataflow, show_memory=show_memory, op_time=op_time)

732

733 return step_stats_analysis.chrome_trace.format_to_string(pretty=True)