1# Copyright 2023 The gRPC authors.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import annotations
16
17import abc
18import contextlib
19import logging
20import threading
21from typing import (
22 Any,
23 Generator,
24 Generic,
25 List,
26 Optional,
27 Tuple,
28 TypeVar,
29 Union,
30)
31
32from grpc._cython import cygrpc as _cygrpc
33from grpc._typing import ChannelArgumentType
34
35_LOGGER = logging.getLogger(__name__)
36
37_channel = Any # _channel.py imports this module.
38ClientCallTracerCapsule = TypeVar("ClientCallTracerCapsule")
39ServerCallTracerFactoryCapsule = TypeVar("ServerCallTracerFactoryCapsule")
40
41_plugin_lock: threading.RLock = threading.RLock()
42_OBSERVABILITY_PLUGIN: Optional["ObservabilityPlugin"] = None
43_SERVICES_TO_EXCLUDE: List[bytes] = [
44 b"google.monitoring.v3.MetricService",
45 b"google.devtools.cloudtrace.v2.TraceService",
46]
47
48
49class ServerCallTracerFactory:
50 """An encapsulation of a ServerCallTracerFactory.
51
52 Instances of this class can be passed to a Channel as values for the
53 grpc.experimental.server_call_tracer_factory option
54 """
55
56 def __init__(self, address):
57 self._address = address
58
59 def __int__(self):
60 return self._address
61
62
63class ObservabilityPlugin(
64 Generic[ClientCallTracerCapsule, ServerCallTracerFactoryCapsule],
65 metaclass=abc.ABCMeta,
66):
67 """Abstract base class for observability plugin.
68
69 *This is a semi-private class that was intended for the exclusive use of
70 the gRPC team.*
71
72 The ClientCallTracerCapsule and ClientCallTracerCapsule created by this
73 plugin should be injected to gRPC core using observability_init at the
74 start of a program, before any channels/servers are built.
75
76 Any future methods added to this interface cannot have the
77 @abc.abstractmethod annotation.
78
79 Attributes:
80 _stats_enabled: A bool indicates whether tracing is enabled.
81 _tracing_enabled: A bool indicates whether stats(metrics) is enabled.
82 _registered_methods: A set which stores the registered method names in
83 bytes.
84 """
85
86 _tracing_enabled: bool = False
87 _stats_enabled: bool = False
88
89 @abc.abstractmethod
90 def create_client_call_tracer(
91 self, method_name: bytes, target: bytes
92 ) -> ClientCallTracerCapsule:
93 """Creates a ClientCallTracerCapsule.
94
95 After register the plugin, if tracing or stats is enabled, this method
96 will be called after a call was created, the ClientCallTracer created
97 by this method will be saved to call context.
98
99 The ClientCallTracer is an object which implements `grpc_core::ClientCallTracer`
100 interface and wrapped in a PyCapsule using `client_call_tracer` as name.
101
102 Args:
103 method_name: The method name of the call in byte format.
104 target: The channel target of the call in byte format.
105 registered_method: Whether this method is pre-registered.
106
107 Returns:
108 A PyCapsule which stores a ClientCallTracer object.
109 """
110 raise NotImplementedError()
111
112 @abc.abstractmethod
113 def save_trace_context(
114 self, trace_id: str, span_id: str, is_sampled: bool
115 ) -> None:
116 """Saves the trace_id and span_id related to the current span.
117
118 After register the plugin, if tracing is enabled, this method will be
119 called after the server finished sending response.
120
121 This method can be used to propagate census context.
122
123 Args:
124 trace_id: The identifier for the trace associated with the span as a
125 32-character hexadecimal encoded string,
126 e.g. 26ed0036f2eff2b7317bccce3e28d01f
127 span_id: The identifier for the span as a 16-character hexadecimal encoded
128 string. e.g. 113ec879e62583bc
129 is_sampled: A bool indicates whether the span is sampled.
130 """
131 raise NotImplementedError()
132
133 @abc.abstractmethod
134 def create_server_call_tracer_factory(
135 self,
136 *,
137 xds: bool = False,
138 ) -> Optional[ServerCallTracerFactoryCapsule]:
139 """Creates a ServerCallTracerFactoryCapsule.
140
141 This method will be called at server initialization time to create a
142 ServerCallTracerFactory, which will be registered to gRPC core.
143
144 The ServerCallTracerFactory is an object which implements
145 `grpc_core::ServerCallTracerFactory` interface and wrapped in a PyCapsule
146 using `server_call_tracer_factory` as name.
147
148 Args:
149 xds: Whether the server is xds server.
150
151 Returns:
152 A PyCapsule which stores a ServerCallTracerFactory object. Or None if
153 plugin decides not to create ServerCallTracerFactory.
154 """
155 raise NotImplementedError()
156
157 @abc.abstractmethod
158 def record_rpc_latency(
159 self, method: str, target: str, rpc_latency: float, status_code: Any
160 ) -> None:
161 """Record the latency of the RPC.
162
163 After register the plugin, if stats is enabled, this method will be
164 called at the end of each RPC.
165
166 Args:
167 method: The fully-qualified name of the RPC method being invoked.
168 target: The target name of the RPC method being invoked.
169 rpc_latency: The latency for the RPC in seconds, equals to the time between
170 when the client invokes the RPC and when the client receives the status.
171 status_code: An element of grpc.StatusCode in string format representing the
172 final status for the RPC.
173 """
174 raise NotImplementedError()
175
176 def set_tracing(self, enable: bool) -> None:
177 """Enable or disable tracing.
178
179 Args:
180 enable: A bool indicates whether tracing should be enabled.
181 """
182 self._tracing_enabled = enable
183
184 def set_stats(self, enable: bool) -> None:
185 """Enable or disable stats(metrics).
186
187 Args:
188 enable: A bool indicates whether stats should be enabled.
189 """
190 self._stats_enabled = enable
191
192 def save_registered_method(self, method_name: bytes) -> None:
193 """Saves the method name to registered_method list.
194
195 When exporting metrics, method name for unregistered methods will be replaced
196 with 'other' by default.
197
198 Args:
199 method_name: The method name in bytes.
200 """
201 raise NotImplementedError()
202
203 @property
204 def tracing_enabled(self) -> bool:
205 return self._tracing_enabled
206
207 @property
208 def stats_enabled(self) -> bool:
209 return self._stats_enabled
210
211 @property
212 def observability_enabled(self) -> bool:
213 return self.tracing_enabled or self.stats_enabled
214
215
216@contextlib.contextmanager
217def get_plugin() -> Generator[Optional[ObservabilityPlugin], None, None]:
218 """Get the ObservabilityPlugin in _observability module.
219
220 Returns:
221 The ObservabilityPlugin currently registered with the _observability
222 module. Or None if no plugin exists at the time of calling this method.
223 """
224 with _plugin_lock:
225 yield _OBSERVABILITY_PLUGIN
226
227
228def set_plugin(observability_plugin: Optional[ObservabilityPlugin]) -> None:
229 """Save ObservabilityPlugin to _observability module.
230
231 Args:
232 observability_plugin: The ObservabilityPlugin to save.
233
234 Raises:
235 ValueError: If an ObservabilityPlugin was already registered at the
236 time of calling this method.
237 """
238 global _OBSERVABILITY_PLUGIN # pylint: disable=global-statement
239 with _plugin_lock:
240 if observability_plugin and _OBSERVABILITY_PLUGIN:
241 error_msg = "observability_plugin was already set!"
242 raise ValueError(error_msg)
243 _OBSERVABILITY_PLUGIN = observability_plugin
244
245
246def observability_init(observability_plugin: ObservabilityPlugin) -> None:
247 """Initialize observability with provided ObservabilityPlugin.
248
249 This method have to be called at the start of a program, before any
250 channels/servers are built.
251
252 Args:
253 observability_plugin: The ObservabilityPlugin to use.
254
255 Raises:
256 ValueError: If an ObservabilityPlugin was already registered at the
257 time of calling this method.
258 """
259 set_plugin(observability_plugin)
260
261
262def observability_deinit() -> None:
263 """Clear the observability context, including ObservabilityPlugin and
264 ServerCallTracerFactory
265
266 This method have to be called after exit observability context so that
267 it's possible to re-initialize again.
268 """
269 set_plugin(None)
270 _cygrpc.clear_server_call_tracer_factory()
271
272
273def maybe_record_rpc_latency(state: "_channel._RPCState") -> None:
274 """Record the latency of the RPC, if the plugin is registered and stats is enabled.
275
276 This method will be called at the end of each RPC.
277
278 Args:
279 state: a grpc._channel._RPCState object which contains the stats related to the
280 RPC.
281 """
282 # TODO(xuanwn): use channel args to exclude those metrics.
283 for exclude_prefix in _SERVICES_TO_EXCLUDE:
284 if exclude_prefix in state.method.encode("utf8"):
285 return
286 with get_plugin() as plugin:
287 if plugin and plugin.stats_enabled:
288 rpc_latency_s = state.rpc_end_time - state.rpc_start_time
289 rpc_latency_ms = rpc_latency_s * 1000
290 plugin.record_rpc_latency(
291 state.method, state.target, rpc_latency_ms, state.code
292 )
293
294
295def create_server_call_tracer_factory_option(
296 xds: bool,
297) -> Union[Tuple[ChannelArgumentType], Tuple[()]]:
298 with get_plugin() as plugin:
299 if plugin and plugin.stats_enabled:
300 server_call_tracer_factory_address = (
301 _cygrpc.get_server_call_tracer_factory_address(plugin, xds)
302 )
303 if server_call_tracer_factory_address:
304 return (
305 (
306 "grpc.experimental.server_call_tracer_factory",
307 ServerCallTracerFactory(
308 server_call_tracer_factory_address
309 ),
310 ),
311 )
312 return ()