Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/profiler/profiler_client.py: 61%

18 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Profiler client APIs.""" 

16 

17from tensorflow.python.framework import errors 

18from tensorflow.python.profiler.internal import _pywrap_profiler 

19 

20from tensorflow.python.util.tf_export import tf_export 

21 

22_GRPC_PREFIX = 'grpc://' 

23 

24 

25@tf_export('profiler.experimental.client.trace', v1=[]) 

26def trace(service_addr, 

27 logdir, 

28 duration_ms, 

29 worker_list='', 

30 num_tracing_attempts=3, 

31 options=None): 

32 """Sends gRPC requests to one or more profiler servers to perform on-demand profiling. 

33 

34 This method will block the calling thread until it receives responses from all 

35 servers or until deadline expiration. Both single host and multiple host 

36 profiling are supported on CPU, GPU, and TPU. 

37 The profiled results will be saved by each server to the specified TensorBoard 

38 log directory (i.e. the directory you save your model checkpoints). Use the 

39 TensorBoard profile plugin to view the visualization and analysis results. 

40 

41 Args: 

42 service_addr: A comma delimited string of gRPC addresses of the workers to 

43 profile. 

44 e.g. service_addr='grpc://localhost:6009' 

45 service_addr='grpc://10.0.0.2:8466,grpc://10.0.0.3:8466' 

46 service_addr='grpc://localhost:12345,grpc://localhost:23456' 

47 logdir: Path to save profile data to, typically a TensorBoard log directory. 

48 This path must be accessible to both the client and server. 

49 e.g. logdir='gs://your_tb_dir' 

50 duration_ms: Duration of tracing or monitoring in milliseconds. Must be 

51 greater than zero. 

52 worker_list: An optional TPU only configuration. The list of workers to 

53 profile in the current session. 

54 num_tracing_attempts: Optional. Automatically retry N times when no trace 

55 event is collected (default 3). 

56 options: profiler.experimental.ProfilerOptions namedtuple for miscellaneous 

57 profiler options. 

58 

59 Raises: 

60 InvalidArgumentError: For when arguments fail validation checks. 

61 UnavailableError: If no trace event was collected. 

62 

63 Example usage (CPU/GPU): 

64 

65 ```python 

66 # Start a profiler server before your model runs. 

67 tf.profiler.experimental.server.start(6009) 

68 # (Model code goes here). 

69 # Send gRPC request to the profiler server to collect a trace of your model. 

70 tf.profiler.experimental.client.trace('grpc://localhost:6009', 

71 '/nfs/tb_log', 2000) 

72 ``` 

73 

74 Example usage (Multiple GPUs): 

75 

76 ```python 

77 # E.g. your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you 

78 # would like to schedule start of profiling 1 second from now, for a 

79 # duration of 2 seconds. 

80 options['delay_ms'] = 1000 

81 tf.profiler.experimental.client.trace( 

82 'grpc://10.0.0.2:8466,grpc://10.0.0.3:8466,grpc://10.0.0.4:8466', 

83 'gs://your_tb_dir', 

84 2000, 

85 options=options) 

86 ``` 

87 

88 Example usage (TPU): 

89 

90 ```python 

91 # Send gRPC request to a TPU worker to collect a trace of your model. A 

92 # profiler service has been started in the TPU worker at port 8466. 

93 # E.g. your TPU IP address is 10.0.0.2 and you want to profile for 2 seconds 

94 # . 

95 tf.profiler.experimental.client.trace('grpc://10.0.0.2:8466', 

96 'gs://your_tb_dir', 2000) 

97 ``` 

98 

99 Example usage (Multiple TPUs): 

100 

101 ```python 

102 # Send gRPC request to a TPU pod to collect a trace of your model on 

103 # multiple TPUs. A profiler service has been started in all the TPU workers 

104 # at the port 8466. 

105 # E.g. your TPU IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you want 

106 # to profile for 2 seconds. 

107 tf.profiler.experimental.client.trace( 

108 'grpc://10.0.0.2:8466', 

109 'gs://your_tb_dir', 

110 2000, 

111 '10.0.0.2:8466,10.0.0.3:8466,10.0.0.4:8466') 

112 ``` 

113 

114 Launch TensorBoard and point it to the same logdir you provided to this API. 

115 

116 ```shell 

117 # logdir can be gs://your_tb_dir as in the above examples. 

118 $ tensorboard --logdir=/tmp/tb_log 

119 ``` 

120 

121 Open your browser and go to localhost:6006/#profile to view profiling results. 

122 

123 """ 

124 if duration_ms <= 0: 

125 raise errors.InvalidArgumentError(None, None, 

126 'duration_ms must be greater than zero.') 

127 

128 opts = dict(options._asdict()) if options is not None else {} 

129 _pywrap_profiler.trace( 

130 _strip_addresses(service_addr, _GRPC_PREFIX), logdir, worker_list, True, 

131 duration_ms, num_tracing_attempts, opts) 

132 

133 

134@tf_export('profiler.experimental.client.monitor', v1=[]) 

135def monitor(service_addr, duration_ms, level=1): 

136 """Sends grpc requests to profiler server to perform on-demand monitoring. 

137 

138 The monitoring result is a light weight performance summary of your model 

139 execution. This method will block the caller thread until it receives the 

140 monitoring result. This method currently supports Cloud TPU only. 

141 

142 Args: 

143 service_addr: gRPC address of profiler service e.g. grpc://10.0.0.2:8466. 

144 duration_ms: Duration of monitoring in ms. 

145 level: Choose a monitoring level between 1 and 2 to monitor your job. Level 

146 2 is more verbose than level 1 and shows more metrics. 

147 

148 Returns: 

149 A string of monitoring output. 

150 

151 Example usage: 

152 

153 ```python 

154 # Continuously send gRPC requests to the Cloud TPU to monitor the model 

155 # execution. 

156 

157 for query in range(0, 100): 

158 print( 

159 tf.profiler.experimental.client.monitor('grpc://10.0.0.2:8466', 1000)) 

160 ``` 

161 

162 """ 

163 return _pywrap_profiler.monitor( 

164 _strip_prefix(service_addr, _GRPC_PREFIX), duration_ms, level, True) 

165 

166 

167def _strip_prefix(s, prefix): 

168 return s[len(prefix):] if s.startswith(prefix) else s 

169 

170 

171def _strip_addresses(addresses, prefix): 

172 return ','.join([_strip_prefix(s, prefix) for s in addresses.split(',')])