1# Copyright 2016 Google LLC All Rights Reserved. 
    2# 
    3# Licensed under the Apache License, Version 2.0 (the "License"); 
    4# you may not use this file except in compliance with the License. 
    5# You may obtain a copy of the License at 
    6# 
    7#     http://www.apache.org/licenses/LICENSE-2.0 
    8# 
    9# Unless required by applicable law or agreed to in writing, software 
    10# distributed under the License is distributed on an "AS IS" BASIS, 
    11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
    12# See the License for the specific language governing permissions and 
    13# limitations under the License. 
    14 
    15"""Helper functions for logging handlers.""" 
    16 
    17import math 
    18import json 
    19import re 
    20import warnings 
    21 
    22try: 
    23    import flask 
    24except ImportError:  # pragma: NO COVER 
    25    flask = None 
    26 
    27import opentelemetry.trace 
    28 
    29from google.cloud.logging_v2.handlers.middleware.request import _get_django_request 
    30 
    31_DJANGO_CONTENT_LENGTH = "CONTENT_LENGTH" 
    32_DJANGO_XCLOUD_TRACE_HEADER = "HTTP_X_CLOUD_TRACE_CONTEXT" 
    33_DJANGO_TRACEPARENT = "HTTP_TRACEPARENT" 
    34_DJANGO_USERAGENT_HEADER = "HTTP_USER_AGENT" 
    35_DJANGO_REMOTE_ADDR_HEADER = "REMOTE_ADDR" 
    36_DJANGO_REFERER_HEADER = "HTTP_REFERER" 
    37_FLASK_XCLOUD_TRACE_HEADER = "X_CLOUD_TRACE_CONTEXT" 
    38_FLASK_TRACEPARENT = "TRACEPARENT" 
    39_PROTOCOL_HEADER = "SERVER_PROTOCOL" 
    40 
    41 
    42def format_stackdriver_json(record, message): 
    43    """Helper to format a LogRecord in in Stackdriver fluentd format. 
    44 
    45    Returns: 
    46        str: JSON str to be written to the log file. 
    47 
    48    DEPRECATED:  use StructuredLogHandler to write formatted logs to standard out instead. 
    49    """ 
    50    subsecond, second = math.modf(record.created) 
    51 
    52    payload = { 
    53        "message": message, 
    54        "timestamp": {"seconds": int(second), "nanos": int(subsecond * 1e9)}, 
    55        "thread": record.thread, 
    56        "severity": record.levelname, 
    57    } 
    58    warnings.warn( 
    59        "format_stackdriver_json is deprecated. Use StructuredLogHandler instead.", 
    60        DeprecationWarning, 
    61    ) 
    62    return json.dumps(payload, ensure_ascii=False) 
    63 
    64 
    65def get_request_data_from_flask(): 
    66    """Get http_request and trace data from flask request headers. 
    67 
    68    Returns: 
    69        Tuple[Optional[dict], Optional[str], Optional[str], bool]: 
    70            Data related to the current http request, trace_id, span_id and trace_sampled 
    71            for the request. All fields will be None if a Flask request isn't found. 
    72    """ 
    73    if flask is None or not flask.request: 
    74        return None, None, None, False 
    75 
    76    # build http_request 
    77    http_request = { 
    78        "requestMethod": flask.request.method, 
    79        "requestUrl": flask.request.url, 
    80        "userAgent": flask.request.user_agent.string, 
    81        "protocol": flask.request.environ.get(_PROTOCOL_HEADER), 
    82    } 
    83 
    84    # find trace id and span id 
    85    # first check for w3c traceparent header 
    86    header = flask.request.headers.get(_FLASK_TRACEPARENT) 
    87    trace_id, span_id, trace_sampled = _parse_trace_parent(header) 
    88    if trace_id is None: 
    89        # traceparent not found. look for xcloud_trace_context header 
    90        header = flask.request.headers.get(_FLASK_XCLOUD_TRACE_HEADER) 
    91        trace_id, span_id, trace_sampled = _parse_xcloud_trace(header) 
    92 
    93    return http_request, trace_id, span_id, trace_sampled 
    94 
    95 
    96def get_request_data_from_django(): 
    97    """Get http_request and trace data from django request headers. 
    98 
    99    Returns: 
    100        Tuple[Optional[dict], Optional[str], Optional[str], bool]: 
    101            Data related to the current http request, trace_id, span_id, and trace_sampled 
    102            for the request. All fields will be None if a django request isn't found. 
    103    """ 
    104    request = _get_django_request() 
    105 
    106    if request is None: 
    107        return None, None, None, False 
    108 
    109    # Django can raise django.core.exceptions.DisallowedHost here for a 
    110    # malformed HTTP_HOST header. But we don't want to import Django modules. 
    111    try: 
    112        request_url = request.build_absolute_uri() 
    113    except Exception: 
    114        request_url = None 
    115 
    116    # build http_request 
    117    http_request = { 
    118        "requestMethod": request.method, 
    119        "requestUrl": request_url, 
    120        "userAgent": request.META.get(_DJANGO_USERAGENT_HEADER), 
    121        "protocol": request.META.get(_PROTOCOL_HEADER), 
    122    } 
    123 
    124    # find trace id and span id 
    125    # first check for w3c traceparent header 
    126    header = request.META.get(_DJANGO_TRACEPARENT) 
    127    trace_id, span_id, trace_sampled = _parse_trace_parent(header) 
    128    if trace_id is None: 
    129        # traceparent not found. look for xcloud_trace_context header 
    130        header = request.META.get(_DJANGO_XCLOUD_TRACE_HEADER) 
    131        trace_id, span_id, trace_sampled = _parse_xcloud_trace(header) 
    132 
    133    return http_request, trace_id, span_id, trace_sampled 
    134 
    135 
    136def _parse_trace_parent(header): 
    137    """Given a w3 traceparent header, extract the trace and span ids. 
    138    For more information see https://www.w3.org/TR/trace-context/ 
    139 
    140    Args: 
    141        header (str): the string extracted from the traceparent header 
    142            example: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01 
    143    Returns: 
    144        Tuple[Optional[dict], Optional[str], bool]: 
    145            The trace_id, span_id and trace_sampled extracted from the header 
    146            Each field will be None if header can't be parsed in expected format. 
    147    """ 
    148    trace_id = span_id = None 
    149    trace_sampled = False 
    150    # see https://www.w3.org/TR/trace-context/ for W3C traceparent format 
    151    if header: 
    152        try: 
    153            VERSION_PART = r"(?!ff)[a-f\d]{2}" 
    154            TRACE_ID_PART = r"(?![0]{32})[a-f\d]{32}" 
    155            PARENT_ID_PART = r"(?![0]{16})[a-f\d]{16}" 
    156            FLAGS_PART = r"[a-f\d]{2}" 
    157            regex = f"^\\s?({VERSION_PART})-({TRACE_ID_PART})-({PARENT_ID_PART})-({FLAGS_PART})(-.*)?\\s?$" 
    158            match = re.match(regex, header) 
    159            trace_id = match.group(2) 
    160            span_id = match.group(3) 
    161            # trace-flag component is an 8-bit bit field. Read as an int 
    162            int_flag = int(match.group(4), 16) 
    163            # trace sampled is set if the right-most bit in flag component is set 
    164            trace_sampled = bool(int_flag & 1) 
    165        except (IndexError, AttributeError): 
    166            # could not parse header as expected. Return None 
    167            pass 
    168    return trace_id, span_id, trace_sampled 
    169 
    170 
    171def _parse_xcloud_trace(header): 
    172    """Given an X_CLOUD_TRACE header, extract the trace and span ids. 
    173 
    174    Args: 
    175        header (str): the string extracted from the X_CLOUD_TRACE header 
    176    Returns: 
    177        Tuple[Optional[str], Optional[str], bool]: 
    178            The trace_id, span_id and trace_sampled extracted from the header 
    179            Each field will be None if not found. 
    180    """ 
    181    trace_id = span_id = None 
    182    trace_sampled = False 
    183 
    184    # As per the format described at https://cloud.google.com/trace/docs/trace-context#legacy-http-header 
    185    #    "X-Cloud-Trace-Context: TRACE_ID[/SPAN_ID][;o=OPTIONS]" 
    186    # for example: 
    187    #    "X-Cloud-Trace-Context: 105445aa7843bc8bf206b12000100000/1;o=1" 
    188    # 
    189    # We expect: 
    190    #   * trace_id (optional, 128-bit hex string):  "105445aa7843bc8bf206b12000100000" 
    191    #   * span_id (optional, 16-bit hex string):   "0000000000000001" (needs to be converted into 16 bit hex string) 
    192    #   * trace_sampled (optional, bool):              true 
    193    if header: 
    194        try: 
    195            regex = r"([\w-]+)?(\/?([\w-]+))?(;?o=(\d))?" 
    196            match = re.match(regex, header) 
    197            trace_id = match.group(1) 
    198            span_id = match.group(3) 
    199            trace_sampled = match.group(5) == "1" 
    200 
    201            # Convert the span ID to 16-bit hexadecimal instead of decimal 
    202            try: 
    203                span_id_int = int(span_id) 
    204                if span_id_int > 0 and span_id_int < 2**64: 
    205                    span_id = f"{span_id_int:016x}" 
    206                else: 
    207                    span_id = None 
    208            except (ValueError, TypeError): 
    209                span_id = None 
    210 
    211        except IndexError: 
    212            pass 
    213    return trace_id, span_id, trace_sampled 
    214 
    215 
    216def _retrieve_current_open_telemetry_span(): 
    217    """Helper to retrieve trace, span ID, and trace sampled information from the current 
    218    OpenTelemetry span. 
    219 
    220    Returns: 
    221        Tuple[Optional[str], Optional[str], bool]: 
    222            Data related to the current trace_id, span_id, and trace_sampled for the 
    223            current OpenTelemetry span. If a span is not found, return None/False for all 
    224            fields. 
    225    """ 
    226    span = opentelemetry.trace.get_current_span() 
    227    if span != opentelemetry.trace.span.INVALID_SPAN: 
    228        context = span.get_span_context() 
    229        trace_id = opentelemetry.trace.format_trace_id(context.trace_id) 
    230        span_id = opentelemetry.trace.format_span_id(context.span_id) 
    231        trace_sampled = context.trace_flags.sampled 
    232 
    233        return trace_id, span_id, trace_sampled 
    234 
    235    return None, None, False 
    236 
    237 
    238def get_request_data(): 
    239    """Helper to get http_request and trace data from supported web 
    240    frameworks (currently supported: Flask and Django), as well as OpenTelemetry. Attempts 
    241    to retrieve trace/spanID from OpenTelemetry first, before going to Traceparent then XCTC. 
    242    HTTP request data is taken from a supporting web framework (currently Flask or Django). 
    243    Because HTTP request data is decoupled from OpenTelemetry, it is possible to get as a 
    244    return value the HTTP request from the web framework of choice, and trace/span data from 
    245    OpenTelemetry, even if trace data is present in the HTTP request headers. 
    246 
    247    Returns: 
    248        Tuple[Optional[dict], Optional[str], Optional[str], bool]: 
    249            Data related to the current http request, trace_id, span_id, and trace_sampled 
    250            for the request. All fields will be None if a http request isn't found. 
    251    """ 
    252 
    253    ( 
    254        otel_trace_id, 
    255        otel_span_id, 
    256        otel_trace_sampled, 
    257    ) = _retrieve_current_open_telemetry_span() 
    258 
    259    # Get HTTP request data 
    260    checkers = ( 
    261        get_request_data_from_django, 
    262        get_request_data_from_flask, 
    263    ) 
    264 
    265    http_request, http_trace_id, http_span_id, http_trace_sampled = ( 
    266        None, 
    267        None, 
    268        None, 
    269        False, 
    270    ) 
    271 
    272    for checker in checkers: 
    273        http_request, http_trace_id, http_span_id, http_trace_sampled = checker() 
    274        if http_request is None: 
    275            http_trace_id, http_span_id, http_trace_sampled = None, None, False 
    276        else: 
    277            break 
    278 
    279    # otel_trace_id existing means the other return values are non-null 
    280    if otel_trace_id: 
    281        return http_request, otel_trace_id, otel_span_id, otel_trace_sampled 
    282    else: 
    283        return http_request, http_trace_id, http_span_id, http_trace_sampled