1"""
2------------------------------------------------------------------------------------------------------------------------
3anonymizer.py
4Copyright (C) 2019-22 - NFStream Developers
5This file is part of NFStream, a Flexible Network Data Analysis Framework (https://www.nfstream.org/).
6NFStream is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public
7License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later
8version.
9NFStream is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
10of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
11You should have received a copy of the GNU Lesser General Public License along with NFStream.
12If not, see <http://www.gnu.org/licenses/>.
13------------------------------------------------------------------------------------------------------------------------
14"""
15
16from hashlib import blake2b
17import secrets
18
19
20class NFAnonymizer(object):
21 """
22 NFAnonymizer: NFStream anonymization implementation.
23 Anonymizer is initiated at each time to_csv or to_pandas is called with a random secret key (64 bytes).
24 Each specified column is anonymized using blake2b algorithm (digest_size: 64 bytes).
25 """
26
27 __slots__ = ("_secret", "_cols_names", "_cols_index", "_enabled")
28
29 def __init__(self, cols_names):
30 self._secret = secrets.token_bytes(64)
31 self._cols_names = cols_names
32 self._cols_index = None
33 self._enabled = False
34 if len(self._cols_names) > 0:
35 self._enabled = True
36
37 def process(self, flow):
38 if self._enabled:
39 if (
40 self._cols_index is None
41 ): # First flow, we extract indexes of cols to anonymize.
42 self._cols_index = []
43 for col_name in self._cols_names:
44 keys = flow.keys()
45 try:
46 self._cols_index.append(keys.index(col_name))
47 except ValueError:
48 print(
49 "WARNING: NFlow do not have {} attribute. Skipping anonymization."
50 )
51 values = flow.values()
52 for col_idx in self._cols_index:
53 if values[col_idx] is not None:
54 values[col_idx] = blake2b(
55 str(values[col_idx]).encode(), digest_size=64, key=self._secret
56 ).hexdigest()
57 return values
58 return flow.values()