1# Copyright 2016 Google LLC All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Helper functions for logging handlers."""
16
17import math
18import json
19import re
20import warnings
21
22try:
23 import flask
24except ImportError: # pragma: NO COVER
25 flask = None
26
27import opentelemetry.trace
28
29from google.cloud.logging_v2.handlers.middleware.request import _get_django_request
30
31_DJANGO_CONTENT_LENGTH = "CONTENT_LENGTH"
32_DJANGO_XCLOUD_TRACE_HEADER = "HTTP_X_CLOUD_TRACE_CONTEXT"
33_DJANGO_TRACEPARENT = "HTTP_TRACEPARENT"
34_DJANGO_USERAGENT_HEADER = "HTTP_USER_AGENT"
35_DJANGO_REMOTE_ADDR_HEADER = "REMOTE_ADDR"
36_DJANGO_REFERER_HEADER = "HTTP_REFERER"
37_FLASK_XCLOUD_TRACE_HEADER = "X_CLOUD_TRACE_CONTEXT"
38_FLASK_TRACEPARENT = "TRACEPARENT"
39_PROTOCOL_HEADER = "SERVER_PROTOCOL"
40
41
42def format_stackdriver_json(record, message):
43 """Helper to format a LogRecord in in Stackdriver fluentd format.
44
45 Returns:
46 str: JSON str to be written to the log file.
47
48 DEPRECATED: use StructuredLogHandler to write formatted logs to standard out instead.
49 """
50 subsecond, second = math.modf(record.created)
51
52 payload = {
53 "message": message,
54 "timestamp": {"seconds": int(second), "nanos": int(subsecond * 1e9)},
55 "thread": record.thread,
56 "severity": record.levelname,
57 }
58 warnings.warn(
59 "format_stackdriver_json is deprecated. Use StructuredLogHandler instead.",
60 DeprecationWarning,
61 )
62 return json.dumps(payload, ensure_ascii=False)
63
64
65def get_request_data_from_flask():
66 """Get http_request and trace data from flask request headers.
67
68 Returns:
69 Tuple[Optional[dict], Optional[str], Optional[str], bool]:
70 Data related to the current http request, trace_id, span_id and trace_sampled
71 for the request. All fields will be None if a Flask request isn't found.
72 """
73 if flask is None or not flask.request:
74 return None, None, None, False
75
76 # build http_request
77 http_request = {
78 "requestMethod": flask.request.method,
79 "requestUrl": flask.request.url,
80 "userAgent": flask.request.user_agent.string,
81 "protocol": flask.request.environ.get(_PROTOCOL_HEADER),
82 }
83
84 # find trace id and span id
85 # first check for w3c traceparent header
86 header = flask.request.headers.get(_FLASK_TRACEPARENT)
87 trace_id, span_id, trace_sampled = _parse_trace_parent(header)
88 if trace_id is None:
89 # traceparent not found. look for xcloud_trace_context header
90 header = flask.request.headers.get(_FLASK_XCLOUD_TRACE_HEADER)
91 trace_id, span_id, trace_sampled = _parse_xcloud_trace(header)
92
93 return http_request, trace_id, span_id, trace_sampled
94
95
96def get_request_data_from_django():
97 """Get http_request and trace data from django request headers.
98
99 Returns:
100 Tuple[Optional[dict], Optional[str], Optional[str], bool]:
101 Data related to the current http request, trace_id, span_id, and trace_sampled
102 for the request. All fields will be None if a django request isn't found.
103 """
104 request = _get_django_request()
105
106 if request is None:
107 return None, None, None, False
108
109 # Django can raise django.core.exceptions.DisallowedHost here for a
110 # malformed HTTP_HOST header. But we don't want to import Django modules.
111 try:
112 request_url = request.build_absolute_uri()
113 except Exception:
114 request_url = None
115
116 # build http_request
117 http_request = {
118 "requestMethod": request.method,
119 "requestUrl": request_url,
120 "userAgent": request.META.get(_DJANGO_USERAGENT_HEADER),
121 "protocol": request.META.get(_PROTOCOL_HEADER),
122 }
123
124 # find trace id and span id
125 # first check for w3c traceparent header
126 header = request.META.get(_DJANGO_TRACEPARENT)
127 trace_id, span_id, trace_sampled = _parse_trace_parent(header)
128 if trace_id is None:
129 # traceparent not found. look for xcloud_trace_context header
130 header = request.META.get(_DJANGO_XCLOUD_TRACE_HEADER)
131 trace_id, span_id, trace_sampled = _parse_xcloud_trace(header)
132
133 return http_request, trace_id, span_id, trace_sampled
134
135
136def _parse_trace_parent(header):
137 """Given a w3 traceparent header, extract the trace and span ids.
138 For more information see https://www.w3.org/TR/trace-context/
139
140 Args:
141 header (str): the string extracted from the traceparent header
142 example: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01
143 Returns:
144 Tuple[Optional[dict], Optional[str], bool]:
145 The trace_id, span_id and trace_sampled extracted from the header
146 Each field will be None if header can't be parsed in expected format.
147 """
148 trace_id = span_id = None
149 trace_sampled = False
150 # see https://www.w3.org/TR/trace-context/ for W3C traceparent format
151 if header:
152 try:
153 VERSION_PART = r"(?!ff)[a-f\d]{2}"
154 TRACE_ID_PART = r"(?![0]{32})[a-f\d]{32}"
155 PARENT_ID_PART = r"(?![0]{16})[a-f\d]{16}"
156 FLAGS_PART = r"[a-f\d]{2}"
157 regex = f"^\\s?({VERSION_PART})-({TRACE_ID_PART})-({PARENT_ID_PART})-({FLAGS_PART})(-.*)?\\s?$"
158 match = re.match(regex, header)
159 trace_id = match.group(2)
160 span_id = match.group(3)
161 # trace-flag component is an 8-bit bit field. Read as an int
162 int_flag = int(match.group(4), 16)
163 # trace sampled is set if the right-most bit in flag component is set
164 trace_sampled = bool(int_flag & 1)
165 except (IndexError, AttributeError):
166 # could not parse header as expected. Return None
167 pass
168 return trace_id, span_id, trace_sampled
169
170
171def _parse_xcloud_trace(header):
172 """Given an X_CLOUD_TRACE header, extract the trace and span ids.
173
174 Args:
175 header (str): the string extracted from the X_CLOUD_TRACE header
176 Returns:
177 Tuple[Optional[str], Optional[str], bool]:
178 The trace_id, span_id and trace_sampled extracted from the header
179 Each field will be None if not found.
180 """
181 trace_id = span_id = None
182 trace_sampled = False
183
184 # As per the format described at https://cloud.google.com/trace/docs/trace-context#legacy-http-header
185 # "X-Cloud-Trace-Context: TRACE_ID[/SPAN_ID][;o=OPTIONS]"
186 # for example:
187 # "X-Cloud-Trace-Context: 105445aa7843bc8bf206b12000100000/1;o=1"
188 #
189 # We expect:
190 # * trace_id (optional, 128-bit hex string): "105445aa7843bc8bf206b12000100000"
191 # * span_id (optional, 16-bit hex string): "0000000000000001" (needs to be converted into 16 bit hex string)
192 # * trace_sampled (optional, bool): true
193 if header:
194 try:
195 regex = r"([\w-]+)?(\/?([\w-]+))?(;?o=(\d))?"
196 match = re.match(regex, header)
197 trace_id = match.group(1)
198 span_id = match.group(3)
199 trace_sampled = match.group(5) == "1"
200
201 # Convert the span ID to 16-bit hexadecimal instead of decimal
202 try:
203 span_id_int = int(span_id)
204 if span_id_int > 0 and span_id_int < 2**64:
205 span_id = f"{span_id_int:016x}"
206 else:
207 span_id = None
208 except (ValueError, TypeError):
209 span_id = None
210
211 except IndexError:
212 pass
213 return trace_id, span_id, trace_sampled
214
215
216def _retrieve_current_open_telemetry_span():
217 """Helper to retrieve trace, span ID, and trace sampled information from the current
218 OpenTelemetry span.
219
220 Returns:
221 Tuple[Optional[str], Optional[str], bool]:
222 Data related to the current trace_id, span_id, and trace_sampled for the
223 current OpenTelemetry span. If a span is not found, return None/False for all
224 fields.
225 """
226 span = opentelemetry.trace.get_current_span()
227 if span != opentelemetry.trace.span.INVALID_SPAN:
228 context = span.get_span_context()
229 trace_id = opentelemetry.trace.format_trace_id(context.trace_id)
230 span_id = opentelemetry.trace.format_span_id(context.span_id)
231 trace_sampled = context.trace_flags.sampled
232
233 return trace_id, span_id, trace_sampled
234
235 return None, None, False
236
237
238def get_request_data():
239 """Helper to get http_request and trace data from supported web
240 frameworks (currently supported: Flask and Django), as well as OpenTelemetry. Attempts
241 to retrieve trace/spanID from OpenTelemetry first, before going to Traceparent then XCTC.
242 HTTP request data is taken from a supporting web framework (currently Flask or Django).
243 Because HTTP request data is decoupled from OpenTelemetry, it is possible to get as a
244 return value the HTTP request from the web framework of choice, and trace/span data from
245 OpenTelemetry, even if trace data is present in the HTTP request headers.
246
247 Returns:
248 Tuple[Optional[dict], Optional[str], Optional[str], bool]:
249 Data related to the current http request, trace_id, span_id, and trace_sampled
250 for the request. All fields will be None if a http request isn't found.
251 """
252
253 (
254 otel_trace_id,
255 otel_span_id,
256 otel_trace_sampled,
257 ) = _retrieve_current_open_telemetry_span()
258
259 # Get HTTP request data
260 checkers = (
261 get_request_data_from_django,
262 get_request_data_from_flask,
263 )
264
265 http_request, http_trace_id, http_span_id, http_trace_sampled = (
266 None,
267 None,
268 None,
269 False,
270 )
271
272 for checker in checkers:
273 http_request, http_trace_id, http_span_id, http_trace_sampled = checker()
274 if http_request is None:
275 http_trace_id, http_span_id, http_trace_sampled = None, None, False
276 else:
277 break
278
279 # otel_trace_id existing means the other return values are non-null
280 if otel_trace_id:
281 return http_request, otel_trace_id, otel_span_id, otel_trace_sampled
282 else:
283 return http_request, http_trace_id, http_span_id, http_trace_sampled