Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nfstream/anonymizer.py: 22%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

27 statements  

1""" 

2------------------------------------------------------------------------------------------------------------------------ 

3anonymizer.py 

4Copyright (C) 2019-22 - NFStream Developers 

5This file is part of NFStream, a Flexible Network Data Analysis Framework (https://www.nfstream.org/). 

6NFStream is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public 

7License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later 

8version. 

9NFStream is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 

10of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. 

11You should have received a copy of the GNU Lesser General Public License along with NFStream. 

12If not, see <http://www.gnu.org/licenses/>. 

13------------------------------------------------------------------------------------------------------------------------ 

14""" 

15 

16from hashlib import blake2b 

17import secrets 

18 

19 

20class NFAnonymizer(object): 

21 """ 

22 NFAnonymizer: NFStream anonymization implementation. 

23 Anonymizer is initiated at each time to_csv or to_pandas is called with a random secret key (64 bytes). 

24 Each specified column is anonymized using blake2b algorithm (digest_size: 64 bytes). 

25 """ 

26 

27 __slots__ = ("_secret", "_cols_names", "_cols_index", "_enabled") 

28 

29 def __init__(self, cols_names): 

30 self._secret = secrets.token_bytes(64) 

31 self._cols_names = cols_names 

32 self._cols_index = None 

33 self._enabled = False 

34 if len(self._cols_names) > 0: 

35 self._enabled = True 

36 

37 def process(self, flow): 

38 if self._enabled: 

39 if ( 

40 self._cols_index is None 

41 ): # First flow, we extract indexes of cols to anonymize. 

42 self._cols_index = [] 

43 for col_name in self._cols_names: 

44 keys = flow.keys() 

45 try: 

46 self._cols_index.append(keys.index(col_name)) 

47 except ValueError: 

48 print( 

49 "WARNING: NFlow do not have {} attribute. Skipping anonymization." 

50 ) 

51 values = flow.values() 

52 for col_idx in self._cols_index: 

53 if values[col_idx] is not None: 

54 values[col_idx] = blake2b( 

55 str(values[col_idx]).encode(), digest_size=64, key=self._secret 

56 ).hexdigest() 

57 return values 

58 return flow.values()