Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/extractattachments.py: 27%

1"""

2Module that extracts attachments from notebooks into their own files

3"""

5# Copyright (c) Jupyter Development Team.

6# Distributed under the terms of the Modified BSD License.

8import os

9from base64 import b64decode

11from traitlets import Bool, Unicode

13from .base import Preprocessor

16class ExtractAttachmentsPreprocessor(Preprocessor):

17 """

18 Extracts attachments from all (markdown and raw) cells in a notebook.

19 The extracted attachments are stored in a directory ('attachments' by default).

20 https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments

21 """

23 attachments_directory_template = Unicode(

24 "{notebook_name}_attachments",

25 help="Directory to place attachments if use_separate_dir is True",

26 ).tag(config=True)

28 use_separate_dir = Bool(

29 False,

30 help="Whether to use output_files_dir (which ExtractOutput also uses) or "

31 "create a separate directory for attachments",

32 ).tag(config=True)

34 def __init__(self, **kw):

35 """

36 Public constructor

37 """

38 super().__init__(**kw)

39 # directory path,

40 self.path_name = "" # will be set in self.preprocess, needs resources

41 # Where extracted attachments are stored in resources

42 self.resources_item_key = (

43 "attachments" # Here as a default, in case someone doesn't want to call preprocess

44 )

46 # Add condition and configurability here

47 def preprocess(self, nb, resources):

48 """

49 Determine some settings and apply preprocessor to notebook

50 """

51 if self.use_separate_dir:

52 self.path_name = self.attachments_directory_template.format(

53 notebook_name=resources["unique_key"]

54 )

55 # Initialize resources for attachments

56 resources["attachment_files_dir"] = self.path_name

57 resources["attachments"] = {}

58 self.resources_item_key = "attachments"

59 else:

60 # Use same resources as ExtractOutput

61 self.path_name = resources["output_files_dir"]

62 self.resources_item_key = "outputs"

64 # Make sure key exists

65 if not isinstance(resources[self.resources_item_key], dict):

66 resources[self.resources_item_key] = {}

68 nb, resources = super().preprocess(nb, resources)

69 return nb, resources

71 def preprocess_cell(self, cell, resources, index):

72 """

73 Extract attachments to individual files and

74 change references to them.

75 E.g.

76 '![image.png](attachment:021fdd80.png)'

77 becomes

78 '![image.png]({path_name}/021fdd80.png)'

79 Assumes self.path_name and self.resources_item_key is set properly (usually in preprocess).

80 """

81 if "attachments" in cell:

82 for fname in cell.attachments:

83 self.log.debug(f"Encountered attachment {fname}")

85 # Add file for writer

87 # Right now I don't know of a situation where there would be multiple

88 # mime types under same filename, and I can't index into it without the mimetype.

89 # So I only read the first one.

90 for mimetype in cell.attachments[fname]:

91 # convert to bytes and decode

92 data = cell.attachments[fname][mimetype].encode("utf-8")

93 decoded = b64decode(data)

94 break

96 # FilesWriter wants path to be in attachment filename here

97 new_filename = os.path.join(self.path_name, fname)

98 resources[self.resources_item_key][new_filename] = decoded

100 # Edit the reference to the attachment

101

102 # os.path.join on windows uses "\\" separator,

103 # but files like markdown still want "/"

104 if os.path.sep != "/":

105 new_filename = new_filename.replace(os.path.sep, "/")

106 cell.source = cell.source.replace("attachment:" + fname, new_filename)

107

108 return cell, resources