1"""Set of common tools to aid bundler implementations."""
2
3# Copyright (c) Jupyter Development Team.
4# Distributed under the terms of the Modified BSD License.
5import os
6import shutil
7import errno
8import nbformat
9import fnmatch
10import glob
11
12def get_file_references(abs_nb_path, version):
13 """Gets a list of files referenced either in Markdown fenced code blocks
14 or in HTML comments from the nbclassic. Expands patterns expressed in
15 gitignore syntax (https://git-scm.com/docs/gitignore). Returns the
16 fully expanded list of filenames relative to the notebook dirname.
17
18 Parameters
19 ----------
20 abs_nb_path: str
21 Absolute path of the notebook on disk
22 version: int
23 Version of the notebook document format to use
24
25 Returns
26 -------
27 list
28 Filename strings relative to the notebook path
29 """
30 ref_patterns = get_reference_patterns(abs_nb_path, version)
31 expanded = expand_references(os.path.dirname(abs_nb_path), ref_patterns)
32 return expanded
33
34def get_reference_patterns(abs_nb_path, version):
35 """Gets a list of reference patterns either in Markdown fenced code blocks
36 or in HTML comments from the nbclassic.
37
38 Parameters
39 ----------
40 abs_nb_path: str
41 Absolute path of the notebook on disk
42 version: int
43 Version of the notebook document format to use
44
45 Returns
46 -------
47 list
48 Pattern strings from the notebook
49 """
50 notebook = nbformat.read(abs_nb_path, version)
51 referenced_list = []
52 for cell in notebook.cells:
53 references = get_cell_reference_patterns(cell)
54 if references:
55 referenced_list = referenced_list + references
56 return referenced_list
57
58def get_cell_reference_patterns(cell):
59 '''
60 Retrieves the list of references from a single notebook cell. Looks for
61 fenced code blocks or HTML comments in Markdown cells, e.g.,
62
63 ```
64 some.csv
65 foo/
66 !foo/bar
67 ```
68
69 or
70
71 <!--associate:
72 some.csv
73 foo/
74 !foo/bar
75 -->
76
77 Parameters
78 ----------
79 cell: dict
80 Notebook cell object
81
82 Returns
83 -------
84 list
85 Reference patterns found in the cell
86 '''
87 referenced = []
88 # invisible after execution: unrendered HTML comment
89 if cell.get('cell_type').startswith('markdown') and cell.get('source').startswith('<!--associate:'):
90 lines = cell.get('source')[len('<!--associate:'):].splitlines()
91 for line in lines:
92 if line.startswith('-->'):
93 break
94 # Trying to go out of the current directory leads to
95 # trouble when deploying
96 if line.find('../') < 0 and not line.startswith('#'):
97 referenced.append(line)
98 # visible after execution: rendered as a code element within a pre element
99 elif cell.get('cell_type').startswith('markdown') and cell.get('source').find('```') >= 0:
100 source = cell.get('source')
101 offset = source.find('```')
102 lines = source[offset + len('```'):].splitlines()
103 for line in lines:
104 if line.startswith('```'):
105 break
106 # Trying to go out of the current directory leads to
107 # trouble when deploying
108 if line.find('../') < 0 and not line.startswith('#'):
109 referenced.append(line)
110
111 # Clean out blank references
112 return [ref for ref in referenced if ref.strip()]
113
114def expand_references(root_path, references):
115 """Expands a set of reference patterns by evaluating them against the
116 given root directory. Expansions are performed against patterns
117 expressed in the same manner as in gitignore
118 (https://git-scm.com/docs/gitignore).
119
120 NOTE: Temporarily changes the current working directory when called.
121
122 Parameters
123 ----------
124 root_path: str
125 Assumed root directory for the patterns
126 references: list
127 Reference patterns from get_reference_patterns expressed with
128 forward-slash directory separators
129
130 Returns
131 -------
132 list
133 Filename strings relative to the root path
134 """
135 # Use normpath to convert to platform specific slashes, but be sure
136 # to retain a trailing slash which normpath pulls off
137 normalized_references = []
138 for ref in references:
139 normalized_ref = os.path.normpath(ref)
140 # un-normalized separator
141 if ref.endswith('/'):
142 normalized_ref += os.sep
143 normalized_references.append(normalized_ref)
144 references = normalized_references
145
146 globbed = []
147 negations = []
148 must_walk = []
149 for pattern in references:
150 if pattern and pattern.find(os.sep) < 0:
151 # simple shell glob
152 cwd = os.getcwd()
153 os.chdir(root_path)
154 if pattern.startswith('!'):
155 negations = negations + glob.glob(pattern[1:])
156 else:
157 globbed = globbed + glob.glob(pattern)
158 os.chdir(cwd)
159 elif pattern:
160 must_walk.append(pattern)
161
162 for pattern in must_walk:
163 pattern_is_negation = pattern.startswith('!')
164 if pattern_is_negation:
165 testpattern = pattern[1:]
166 else:
167 testpattern = pattern
168 for root, _, filenames in os.walk(root_path):
169 for filename in filenames:
170 joined = os.path.join(root[len(root_path) + 1:], filename)
171 if testpattern.endswith(os.sep):
172 if joined.startswith(testpattern):
173 if pattern_is_negation:
174 negations.append(joined)
175 else:
176 globbed.append(joined)
177 elif testpattern.find('**') >= 0:
178 # path wildcard
179 ends = testpattern.split('**')
180 if len(ends) == 2:
181 if joined.startswith(ends[0]) and joined.endswith(ends[1]):
182 if pattern_is_negation:
183 negations.append(joined)
184 else:
185 globbed.append(joined)
186 else:
187 # segments should be respected
188 if fnmatch.fnmatch(joined, testpattern):
189 if pattern_is_negation:
190 negations.append(joined)
191 else:
192 globbed.append(joined)
193
194 for negated in negations:
195 try:
196 globbed.remove(negated)
197 except ValueError as err:
198 pass
199 return set(globbed)
200
201def copy_filelist(src, dst, src_relative_filenames):
202 """Copies the given list of files, relative to src, into dst, creating
203 directories along the way as needed and ignore existence errors.
204 Skips any files that do not exist. Does not create empty directories
205 from src in dst.
206
207 Parameters
208 ----------
209 src: str
210 Root of the source directory
211 dst: str
212 Root of the destination directory
213 src_relative_filenames: list
214 Filenames relative to src
215 """
216 for filename in src_relative_filenames:
217 # Only consider the file if it exists in src
218 if os.path.isfile(os.path.join(src, filename)):
219 parent_relative = os.path.dirname(filename)
220 if parent_relative:
221 # Make sure the parent directory exists
222 parent_dst = os.path.join(dst, parent_relative)
223 try:
224 os.makedirs(parent_dst)
225 except OSError as exc:
226 if exc.errno == errno.EEXIST:
227 pass
228 else:
229 raise exc
230 shutil.copy2(os.path.join(src, filename), os.path.join(dst, filename))