1""" 
    2<Program Name> 
    3  hash.py 
    4 
    5<Author> 
    6  Vladimir Diaz <vladimir.v.diaz@gmail.com> 
    7 
    8<Started> 
    9  February 28, 2012.  Based on a previous version of this module. 
    10 
    11<Copyright> 
    12  See LICENSE for licensing information. 
    13 
    14<Purpose> 
    15  Support secure hashing and message digests. Any hash-related routines that 
    16  securesystemslib requires should be located in this module.  Simplifying the 
    17  creation of digest objects, and providing a central location for hash 
    18  routines are the main goals of this module.  Support routines implemented 
    19  include functions to create digest objects given a filename or file object. 
    20  Only the standard hashlib library is currently supported, but 
    21  pyca/cryptography support will be added in the future. 
    22""" 
    23 
    24import hashlib 
    25 
    26from securesystemslib import exceptions 
    27from securesystemslib.storage import FilesystemBackend 
    28 
    29DEFAULT_CHUNK_SIZE = 4096 
    30DEFAULT_HASH_ALGORITHM = "sha256" 
    31DEFAULT_HASH_LIBRARY = "hashlib" 
    32SUPPORTED_LIBRARIES = ["hashlib"] 
    33 
    34 
    35# If `pyca_crypto` is installed, add it to supported libraries 
    36try: 
    37    import binascii 
    38 
    39    from cryptography.hazmat.backends import default_backend 
    40    from cryptography.hazmat.primitives import hashes as _pyca_hashes 
    41 
    42    # Dictionary of `pyca/cryptography` supported hash algorithms. 
    43    PYCA_DIGEST_OBJECTS_CACHE = { 
    44        "sha224": _pyca_hashes.SHA224, 
    45        "sha256": _pyca_hashes.SHA256, 
    46        "sha384": _pyca_hashes.SHA384, 
    47        "sha512": _pyca_hashes.SHA512, 
    48    } 
    49 
    50    SUPPORTED_LIBRARIES.append("pyca_crypto") 
    51 
    52    class PycaDiggestWrapper: 
    53        """ 
    54        <Purpose> 
    55          A wrapper around `cryptography.hazmat.primitives.hashes.Hash` which adds 
    56          additional methods to meet expected interface for digest objects: 
    57 
    58            digest_object.digest_size 
    59            digest_object.hexdigest() 
    60            digest_object.update('data') 
    61            digest_object.digest() 
    62 
    63        <Properties> 
    64          algorithm: 
    65            Specific for `cryptography.hazmat.primitives.hashes.Hash` object. 
    66 
    67          digest_size: 
    68            Returns original's object digest size. 
    69 
    70        <Methods> 
    71          digest(self) -> bytes: 
    72            Calls original's object `finalize` method and returns digest as bytes. 
    73            NOTE: `cryptography.hazmat.primitives.hashes.Hash` allows calling 
    74            `finalize` method just once on the same instance, so everytime `digest` 
    75            methods is called, we replace internal object (`_digest_obj`). 
    76 
    77          hexdigest(self) -> str: 
    78            Returns a string hex representation of digest. 
    79 
    80          update(self, data) -> None: 
    81            Updates digest object data by calling the original's object `update` 
    82            method. 
    83        """ 
    84 
    85        def __init__(self, digest_obj): 
    86            self._digest_obj = digest_obj 
    87 
    88        @property 
    89        def algorithm(self): 
    90            return self._digest_obj.algorithm 
    91 
    92        @property 
    93        def digest_size(self): 
    94            return self._digest_obj.algorithm.digest_size 
    95 
    96        def digest(self): 
    97            digest_obj_copy = self._digest_obj.copy() 
    98            digest = self._digest_obj.finalize() 
    99            self._digest_obj = digest_obj_copy 
    100            return digest 
    101 
    102        def hexdigest(self): 
    103            return binascii.hexlify(self.digest()).decode("utf-8") 
    104 
    105        def update(self, data): 
    106            self._digest_obj.update(data) 
    107 
    108except ImportError:  # pragma: no cover 
    109    pass 
    110 
    111 
    112def digest(algorithm=DEFAULT_HASH_ALGORITHM, hash_library=DEFAULT_HASH_LIBRARY): 
    113    """ 
    114    <Purpose> 
    115      Provide the caller with the ability to create digest objects without having 
    116      to worry about crypto library availability or which library to use.  The 
    117      caller also has the option of specifying which hash algorithm and/or 
    118      library to use. 
    119 
    120      # Creation of a digest object using defaults or by specifying hash 
    121      # algorithm and library. 
    122      digest_object = securesystemslib.hash.digest() 
    123      digest_object = securesystemslib.hash.digest('sha384') 
    124      digest_object = securesystemslib.hash.digest('sha256', 'hashlib') 
    125 
    126      # The expected interface for digest objects. 
    127      digest_object.digest_size 
    128      digest_object.hexdigest() 
    129      digest_object.update('data') 
    130      digest_object.digest() 
    131 
    132      # Added hash routines by this module. 
    133      digest_object = securesystemslib.hash.digest_fileobject(file_object) 
    134      digest_object = securesystemslib.hash.digest_filename(filename) 
    135 
    136    <Arguments> 
    137      algorithm: 
    138        The hash algorithm (e.g., 'sha256', 'sha512'). 
    139 
    140      hash_library: 
    141        The crypto library to use for the given hash algorithm (e.g., 'hashlib'). 
    142 
    143    <Exceptions> 
    144      securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported 
    145      hashing algorithm is specified, or digest could not be generated with given 
    146      the algorithm. 
    147 
    148      securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported 
    149      library was requested via 'hash_library'. 
    150 
    151    <Side Effects> 
    152      None. 
    153 
    154    <Returns> 
    155      Digest object 
    156 
    157      e.g. 
    158        hashlib.new(algorithm) or 
    159        PycaDiggestWrapper object 
    160    """ 
    161 
    162    # Was a hashlib digest object requested and is it supported? 
    163    # If so, return the digest object. 
    164    if hash_library == "hashlib" and hash_library in SUPPORTED_LIBRARIES: 
    165        try: 
    166            if algorithm == "blake2b-256": 
    167                return hashlib.new("blake2b", digest_size=32) 
    168            else: 
    169                return hashlib.new(algorithm) 
    170 
    171        except (ValueError, TypeError): 
    172            # ValueError: the algorithm value was unknown 
    173            # TypeError: unexpected argument digest_size (on old python) 
    174            raise exceptions.UnsupportedAlgorithmError(algorithm) 
    175 
    176    # Was a pyca_crypto digest object requested and is it supported? 
    177    elif hash_library == "pyca_crypto" and hash_library in SUPPORTED_LIBRARIES: 
    178        try: 
    179            hash_algorithm = PYCA_DIGEST_OBJECTS_CACHE[algorithm]() 
    180            return PycaDiggestWrapper( 
    181                _pyca_hashes.Hash(hash_algorithm, default_backend()) 
    182            ) 
    183 
    184        except KeyError: 
    185            raise exceptions.UnsupportedAlgorithmError(algorithm) 
    186 
    187    # The requested hash library is not supported. 
    188    else: 
    189        raise exceptions.UnsupportedLibraryError( 
    190            "Unsupported" 
    191            " library requested.  Supported hash" 
    192            " libraries: " + repr(SUPPORTED_LIBRARIES) 
    193        ) 
    194 
    195 
    196def digest_fileobject( 
    197    file_object, 
    198    algorithm=DEFAULT_HASH_ALGORITHM, 
    199    hash_library=DEFAULT_HASH_LIBRARY, 
    200    normalize_line_endings=False, 
    201): 
    202    """ 
    203    <Purpose> 
    204      Generate a digest object given a file object.  The new digest object 
    205      is updated with the contents of 'file_object' prior to returning the 
    206      object to the caller. 
    207 
    208    <Arguments> 
    209      file_object: 
    210        File object whose contents will be used as the data 
    211        to update the hash of a digest object to be returned. 
    212 
    213      algorithm: 
    214        The hash algorithm (e.g., 'sha256', 'sha512'). 
    215 
    216      hash_library: 
    217        The library providing the hash algorithms (e.g., 'hashlib'). 
    218 
    219      normalize_line_endings: (default False) 
    220        Whether or not to normalize line endings for cross-platform support. 
    221        Note that this results in ambiguous hashes (e.g. 'abc\n' and 'abc\r\n' 
    222        will produce the same hash), so be careful to only apply this to text 
    223        files (not binary), when that equivalence is desirable and cannot result 
    224        in easily-maliciously-corrupted files producing the same hash as a valid 
    225        file. 
    226 
    227    <Exceptions> 
    228      securesystemslib.exceptions.FormatError, if the arguments are 
    229      improperly formatted. 
    230 
    231      securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported 
    232      hashing algorithm was specified via 'algorithm'. 
    233 
    234      securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported 
    235      crypto library was specified via 'hash_library'. 
    236 
    237    <Side Effects> 
    238      None. 
    239 
    240    <Returns> 
    241      Digest object 
    242 
    243      e.g. 
    244        hashlib.new(algorithm) or 
    245        PycaDiggestWrapper object 
    246    """ 
    247    # Digest object returned whose hash will be updated using 'file_object'. 
    248    # digest() raises: 
    249    # securesystemslib.exceptions.UnsupportedAlgorithmError 
    250    # securesystemslib.exceptions.UnsupportedLibraryError 
    251    digest_object = digest(algorithm, hash_library) 
    252 
    253    # Defensively seek to beginning, as there's no case where we don't 
    254    # intend to start from the beginning of the file. 
    255    file_object.seek(0) 
    256 
    257    # Read the contents of the file object in at most 4096-byte chunks. 
    258    # Update the hash with the data read from each chunk and return after 
    259    # the entire file is processed. 
    260    while True: 
    261        data = file_object.read(DEFAULT_CHUNK_SIZE) 
    262        if not data: 
    263            break 
    264 
    265        if normalize_line_endings: 
    266            while data[-1:] == b"\r": 
    267                c = file_object.read(1) 
    268                if not c: 
    269                    break 
    270 
    271                data += c 
    272 
    273            data = ( 
    274                data 
    275                # First Windows 
    276                .replace(b"\r\n", b"\n") 
    277                # Then Mac 
    278                .replace(b"\r", b"\n") 
    279            ) 
    280 
    281        if not isinstance(data, bytes): 
    282            digest_object.update(data.encode("utf-8")) 
    283 
    284        else: 
    285            digest_object.update(data) 
    286 
    287    return digest_object 
    288 
    289 
    290def digest_filename( 
    291    filename, 
    292    algorithm=DEFAULT_HASH_ALGORITHM, 
    293    hash_library=DEFAULT_HASH_LIBRARY, 
    294    normalize_line_endings=False, 
    295    storage_backend=None, 
    296): 
    297    """ 
    298    <Purpose> 
    299      Generate a digest object, update its hash using a file object 
    300      specified by filename, and then return it to the caller. 
    301 
    302    <Arguments> 
    303      filename: 
    304        The filename belonging to the file object to be used. 
    305 
    306      algorithm: 
    307        The hash algorithm (e.g., 'sha256', 'sha512'). 
    308 
    309      hash_library: 
    310        The library providing the hash algorithms (e.g., 'hashlib'). 
    311 
    312      normalize_line_endings: 
    313        Whether or not to normalize line endings for cross-platform support. 
    314 
    315      storage_backend: 
    316        An object which implements 
    317        securesystemslib.storage.StorageBackendInterface. When no object is 
    318        passed a FilesystemBackend will be instantiated and used. 
    319 
    320    <Exceptions> 
    321      securesystemslib.exceptions.UnsupportedAlgorithmError, if the given 
    322      'algorithm' is unsupported. 
    323 
    324      securesystemslib.exceptions.UnsupportedLibraryError, if the given 
    325      'hash_library' is unsupported. 
    326 
    327      securesystemslib.exceptions.StorageError, if the file cannot be opened. 
    328 
    329    <Side Effects> 
    330      None. 
    331 
    332    <Returns> 
    333      Digest object 
    334 
    335      e.g. 
    336        hashlib.new(algorithm) or 
    337        PycaDiggestWrapper object 
    338    """ 
    339    digest_object = None 
    340 
    341    if storage_backend is None: 
    342        storage_backend = FilesystemBackend() 
    343 
    344    # Open 'filename' in read+binary mode. 
    345    with storage_backend.get(filename) as file_object: 
    346        # Create digest_object and update its hash data from file_object. 
    347        # digest_fileobject() raises: 
    348        # securesystemslib.exceptions.UnsupportedAlgorithmError 
    349        # securesystemslib.exceptions.UnsupportedLibraryError 
    350        digest_object = digest_fileobject( 
    351            file_object, algorithm, hash_library, normalize_line_endings 
    352        ) 
    353 
    354    return digest_object