Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/extractattachments.py: 27%

37 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2Module that extracts attachments from notebooks into their own files 

3""" 

4 

5# Copyright (c) Jupyter Development Team. 

6# Distributed under the terms of the Modified BSD License. 

7 

8import os 

9from base64 import b64decode 

10 

11from traitlets import Bool, Unicode 

12 

13from .base import Preprocessor 

14 

15 

16class ExtractAttachmentsPreprocessor(Preprocessor): 

17 """ 

18 Extracts attachments from all (markdown and raw) cells in a notebook. 

19 The extracted attachments are stored in a directory ('attachments' by default). 

20 https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments 

21 """ 

22 

23 attachments_directory_template = Unicode( 

24 "{notebook_name}_attachments", 

25 help="Directory to place attachments if use_separate_dir is True", 

26 ).tag(config=True) 

27 

28 use_separate_dir = Bool( 

29 False, 

30 help="Whether to use output_files_dir (which ExtractOutput also uses) or " 

31 "create a separate directory for attachments", 

32 ).tag(config=True) 

33 

34 def __init__(self, **kw): 

35 """ 

36 Public constructor 

37 """ 

38 super().__init__(**kw) 

39 # directory path, 

40 self.path_name = "" # will be set in self.preprocess, needs resources 

41 # Where extracted attachments are stored in resources 

42 self.resources_item_key = ( 

43 "attachments" # Here as a default, in case someone doesn't want to call preprocess 

44 ) 

45 

46 # Add condition and configurability here 

47 def preprocess(self, nb, resources): 

48 """ 

49 Determine some settings and apply preprocessor to notebook 

50 """ 

51 if self.use_separate_dir: 

52 self.path_name = self.attachments_directory_template.format( 

53 notebook_name=resources["unique_key"] 

54 ) 

55 # Initialize resources for attachments 

56 resources["attachment_files_dir"] = self.path_name 

57 resources["attachments"] = {} 

58 self.resources_item_key = "attachments" 

59 else: 

60 # Use same resources as ExtractOutput 

61 self.path_name = resources["output_files_dir"] 

62 self.resources_item_key = "outputs" 

63 

64 # Make sure key exists 

65 if not isinstance(resources[self.resources_item_key], dict): 

66 resources[self.resources_item_key] = {} 

67 

68 nb, resources = super().preprocess(nb, resources) 

69 return nb, resources 

70 

71 def preprocess_cell(self, cell, resources, index): 

72 """ 

73 Extract attachments to individual files and 

74 change references to them. 

75 E.g. 

76 '![image.png](attachment:021fdd80.png)' 

77 becomes 

78 '![image.png]({path_name}/021fdd80.png)' 

79 Assumes self.path_name and self.resources_item_key is set properly (usually in preprocess). 

80 """ 

81 if "attachments" in cell: 

82 for fname in cell.attachments: 

83 self.log.debug(f"Encountered attachment {fname}") 

84 

85 # Add file for writer 

86 

87 # Right now I don't know of a situation where there would be multiple 

88 # mime types under same filename, and I can't index into it without the mimetype. 

89 # So I only read the first one. 

90 for mimetype in cell.attachments[fname]: 

91 # convert to bytes and decode 

92 data = cell.attachments[fname][mimetype].encode("utf-8") 

93 decoded = b64decode(data) 

94 break 

95 

96 # FilesWriter wants path to be in attachment filename here 

97 new_filename = os.path.join(self.path_name, fname) 

98 resources[self.resources_item_key][new_filename] = decoded 

99 

100 # Edit the reference to the attachment 

101 

102 # os.path.join on windows uses "\\" separator, 

103 # but files like markdown still want "/" 

104 if os.path.sep != "/": 

105 new_filename = new_filename.replace(os.path.sep, "/") 

106 cell.source = cell.source.replace("attachment:" + fname, new_filename) 

107 

108 return cell, resources