Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/extractattachments.py: 27%
37 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2Module that extracts attachments from notebooks into their own files
3"""
5# Copyright (c) Jupyter Development Team.
6# Distributed under the terms of the Modified BSD License.
8import os
9from base64 import b64decode
11from traitlets import Bool, Unicode
13from .base import Preprocessor
16class ExtractAttachmentsPreprocessor(Preprocessor):
17 """
18 Extracts attachments from all (markdown and raw) cells in a notebook.
19 The extracted attachments are stored in a directory ('attachments' by default).
20 https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments
21 """
23 attachments_directory_template = Unicode(
24 "{notebook_name}_attachments",
25 help="Directory to place attachments if use_separate_dir is True",
26 ).tag(config=True)
28 use_separate_dir = Bool(
29 False,
30 help="Whether to use output_files_dir (which ExtractOutput also uses) or "
31 "create a separate directory for attachments",
32 ).tag(config=True)
34 def __init__(self, **kw):
35 """
36 Public constructor
37 """
38 super().__init__(**kw)
39 # directory path,
40 self.path_name = "" # will be set in self.preprocess, needs resources
41 # Where extracted attachments are stored in resources
42 self.resources_item_key = (
43 "attachments" # Here as a default, in case someone doesn't want to call preprocess
44 )
46 # Add condition and configurability here
47 def preprocess(self, nb, resources):
48 """
49 Determine some settings and apply preprocessor to notebook
50 """
51 if self.use_separate_dir:
52 self.path_name = self.attachments_directory_template.format(
53 notebook_name=resources["unique_key"]
54 )
55 # Initialize resources for attachments
56 resources["attachment_files_dir"] = self.path_name
57 resources["attachments"] = {}
58 self.resources_item_key = "attachments"
59 else:
60 # Use same resources as ExtractOutput
61 self.path_name = resources["output_files_dir"]
62 self.resources_item_key = "outputs"
64 # Make sure key exists
65 if not isinstance(resources[self.resources_item_key], dict):
66 resources[self.resources_item_key] = {}
68 nb, resources = super().preprocess(nb, resources)
69 return nb, resources
71 def preprocess_cell(self, cell, resources, index):
72 """
73 Extract attachments to individual files and
74 change references to them.
75 E.g.
76 ''
77 becomes
78 ''
79 Assumes self.path_name and self.resources_item_key is set properly (usually in preprocess).
80 """
81 if "attachments" in cell:
82 for fname in cell.attachments:
83 self.log.debug(f"Encountered attachment {fname}")
85 # Add file for writer
87 # Right now I don't know of a situation where there would be multiple
88 # mime types under same filename, and I can't index into it without the mimetype.
89 # So I only read the first one.
90 for mimetype in cell.attachments[fname]:
91 # convert to bytes and decode
92 data = cell.attachments[fname][mimetype].encode("utf-8")
93 decoded = b64decode(data)
94 break
96 # FilesWriter wants path to be in attachment filename here
97 new_filename = os.path.join(self.path_name, fname)
98 resources[self.resources_item_key][new_filename] = decoded
100 # Edit the reference to the attachment
102 # os.path.join on windows uses "\\" separator,
103 # but files like markdown still want "/"
104 if os.path.sep != "/":
105 new_filename = new_filename.replace(os.path.sep, "/")
106 cell.source = cell.source.replace("attachment:" + fname, new_filename)
108 return cell, resources