Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/patch.py: 12%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# patch.py -- For dealing with packed-style patches.
2# Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3#
4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5# General Public License as public by the Free Software Foundation; version 2.0
6# or (at your option) any later version. You can redistribute it and/or
7# modify it under the terms of either of these two licenses.
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15# You should have received a copy of the licenses; if not, see
16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18# License, Version 2.0.
19#
21"""Classes for dealing with git am-style patches.
23These patches are basically unified diffs with some extra metadata tacked
24on.
25"""
27import email.parser
28import time
29from difflib import SequenceMatcher
30from typing import BinaryIO, Optional, TextIO, Union
32from .objects import S_ISGITLINK, Blob, Commit
33from .pack import ObjectContainer
35FIRST_FEW_BYTES = 8000
38def write_commit_patch(f, commit, contents, progress, version=None, encoding=None):
39 """Write a individual file patch.
41 Args:
42 commit: Commit object
43 progress: Tuple with current patch number and total.
45 Returns:
46 tuple with filename and contents
47 """
48 encoding = encoding or getattr(f, "encoding", "ascii")
49 if isinstance(contents, str):
50 contents = contents.encode(encoding)
51 (num, total) = progress
52 f.write(
53 b"From "
54 + commit.id
55 + b" "
56 + time.ctime(commit.commit_time).encode(encoding)
57 + b"\n"
58 )
59 f.write(b"From: " + commit.author + b"\n")
60 f.write(
61 b"Date: " + time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n"
62 )
63 f.write(
64 ("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding)
65 + commit.message
66 + b"\n"
67 )
68 f.write(b"\n")
69 f.write(b"---\n")
70 try:
71 import subprocess
73 p = subprocess.Popen(
74 ["diffstat"], stdout=subprocess.PIPE, stdin=subprocess.PIPE
75 )
76 except (ImportError, OSError):
77 pass # diffstat not available?
78 else:
79 (diffstat, _) = p.communicate(contents)
80 f.write(diffstat)
81 f.write(b"\n")
82 f.write(contents)
83 f.write(b"-- \n")
84 if version is None:
85 from dulwich import __version__ as dulwich_version
87 f.write(b"Dulwich %d.%d.%d\n" % dulwich_version)
88 else:
89 f.write(version.encode(encoding) + b"\n")
92def get_summary(commit):
93 """Determine the summary line for use in a filename.
95 Args:
96 commit: Commit
97 Returns: Summary string
98 """
99 decoded = commit.message.decode(errors="replace")
100 return decoded.splitlines()[0].replace(" ", "-")
103# Unified Diff
104def _format_range_unified(start, stop):
105 """Convert range to the "ed" format."""
106 # Per the diff spec at http://www.unix.org/single_unix_specification/
107 beginning = start + 1 # lines start numbering with one
108 length = stop - start
109 if length == 1:
110 return f"{beginning}"
111 if not length:
112 beginning -= 1 # empty ranges begin at line just before the range
113 return f"{beginning},{length}"
116def unified_diff(
117 a,
118 b,
119 fromfile="",
120 tofile="",
121 fromfiledate="",
122 tofiledate="",
123 n=3,
124 lineterm="\n",
125 tree_encoding="utf-8",
126 output_encoding="utf-8",
127):
128 """difflib.unified_diff that can detect "No newline at end of file" as
129 original "git diff" does.
131 Based on the same function in Python2.7 difflib.py
132 """
133 started = False
134 for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
135 if not started:
136 started = True
137 fromdate = f"\t{fromfiledate}" if fromfiledate else ""
138 todate = f"\t{tofiledate}" if tofiledate else ""
139 yield f"--- {fromfile.decode(tree_encoding)}{fromdate}{lineterm}".encode(
140 output_encoding
141 )
142 yield f"+++ {tofile.decode(tree_encoding)}{todate}{lineterm}".encode(
143 output_encoding
144 )
146 first, last = group[0], group[-1]
147 file1_range = _format_range_unified(first[1], last[2])
148 file2_range = _format_range_unified(first[3], last[4])
149 yield f"@@ -{file1_range} +{file2_range} @@{lineterm}".encode(output_encoding)
151 for tag, i1, i2, j1, j2 in group:
152 if tag == "equal":
153 for line in a[i1:i2]:
154 yield b" " + line
155 continue
156 if tag in ("replace", "delete"):
157 for line in a[i1:i2]:
158 if not line[-1:] == b"\n":
159 line += b"\n\\ No newline at end of file\n"
160 yield b"-" + line
161 if tag in ("replace", "insert"):
162 for line in b[j1:j2]:
163 if not line[-1:] == b"\n":
164 line += b"\n\\ No newline at end of file\n"
165 yield b"+" + line
168def is_binary(content):
169 """See if the first few bytes contain any null characters.
171 Args:
172 content: Bytestring to check for binary content
173 """
174 return b"\0" in content[:FIRST_FEW_BYTES]
177def shortid(hexsha):
178 if hexsha is None:
179 return b"0" * 7
180 else:
181 return hexsha[:7]
184def patch_filename(p, root):
185 if p is None:
186 return b"/dev/null"
187 else:
188 return root + b"/" + p
191def write_object_diff(f, store: ObjectContainer, old_file, new_file, diff_binary=False):
192 """Write the diff for an object.
194 Args:
195 f: File-like object to write to
196 store: Store to retrieve objects from, if necessary
197 old_file: (path, mode, hexsha) tuple
198 new_file: (path, mode, hexsha) tuple
199 diff_binary: Whether to diff files even if they
200 are considered binary files by is_binary().
202 Note: the tuple elements should be None for nonexistent files
203 """
204 (old_path, old_mode, old_id) = old_file
205 (new_path, new_mode, new_id) = new_file
206 patched_old_path = patch_filename(old_path, b"a")
207 patched_new_path = patch_filename(new_path, b"b")
209 def content(mode, hexsha):
210 if hexsha is None:
211 return Blob.from_string(b"")
212 elif S_ISGITLINK(mode):
213 return Blob.from_string(b"Subproject commit " + hexsha + b"\n")
214 else:
215 return store[hexsha]
217 def lines(content):
218 if not content:
219 return []
220 else:
221 return content.splitlines()
223 f.writelines(
224 gen_diff_header((old_path, new_path), (old_mode, new_mode), (old_id, new_id))
225 )
226 old_content = content(old_mode, old_id)
227 new_content = content(new_mode, new_id)
228 if not diff_binary and (is_binary(old_content.data) or is_binary(new_content.data)):
229 binary_diff = (
230 b"Binary files "
231 + patched_old_path
232 + b" and "
233 + patched_new_path
234 + b" differ\n"
235 )
236 f.write(binary_diff)
237 else:
238 f.writelines(
239 unified_diff(
240 lines(old_content),
241 lines(new_content),
242 patched_old_path,
243 patched_new_path,
244 )
245 )
248# TODO(jelmer): Support writing unicode, rather than bytes.
249def gen_diff_header(paths, modes, shas):
250 """Write a blob diff header.
252 Args:
253 paths: Tuple with old and new path
254 modes: Tuple with old and new modes
255 shas: Tuple with old and new shas
256 """
257 (old_path, new_path) = paths
258 (old_mode, new_mode) = modes
259 (old_sha, new_sha) = shas
260 if old_path is None and new_path is not None:
261 old_path = new_path
262 if new_path is None and old_path is not None:
263 new_path = old_path
264 old_path = patch_filename(old_path, b"a")
265 new_path = patch_filename(new_path, b"b")
266 yield b"diff --git " + old_path + b" " + new_path + b"\n"
268 if old_mode != new_mode:
269 if new_mode is not None:
270 if old_mode is not None:
271 yield (f"old file mode {old_mode:o}\n").encode("ascii")
272 yield (f"new file mode {new_mode:o}\n").encode("ascii")
273 else:
274 yield (f"deleted file mode {old_mode:o}\n").encode("ascii")
275 yield b"index " + shortid(old_sha) + b".." + shortid(new_sha)
276 if new_mode is not None and old_mode is not None:
277 yield (f" {new_mode:o}").encode("ascii")
278 yield b"\n"
281# TODO(jelmer): Support writing unicode, rather than bytes.
282def write_blob_diff(f, old_file, new_file):
283 """Write blob diff.
285 Args:
286 f: File-like object to write to
287 old_file: (path, mode, hexsha) tuple (None if nonexisting)
288 new_file: (path, mode, hexsha) tuple (None if nonexisting)
290 Note: The use of write_object_diff is recommended over this function.
291 """
292 (old_path, old_mode, old_blob) = old_file
293 (new_path, new_mode, new_blob) = new_file
294 patched_old_path = patch_filename(old_path, b"a")
295 patched_new_path = patch_filename(new_path, b"b")
297 def lines(blob):
298 if blob is not None:
299 return blob.splitlines()
300 else:
301 return []
303 f.writelines(
304 gen_diff_header(
305 (old_path, new_path),
306 (old_mode, new_mode),
307 (getattr(old_blob, "id", None), getattr(new_blob, "id", None)),
308 )
309 )
310 old_contents = lines(old_blob)
311 new_contents = lines(new_blob)
312 f.writelines(
313 unified_diff(old_contents, new_contents, patched_old_path, patched_new_path)
314 )
317def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
318 """Write tree diff.
320 Args:
321 f: File-like object to write to.
322 old_tree: Old tree id
323 new_tree: New tree id
324 diff_binary: Whether to diff files even if they
325 are considered binary files by is_binary().
326 """
327 changes = store.tree_changes(old_tree, new_tree)
328 for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
329 write_object_diff(
330 f,
331 store,
332 (oldpath, oldmode, oldsha),
333 (newpath, newmode, newsha),
334 diff_binary=diff_binary,
335 )
338def git_am_patch_split(f: Union[TextIO, BinaryIO], encoding: Optional[str] = None):
339 """Parse a git-am-style patch and split it up into bits.
341 Args:
342 f: File-like object to parse
343 encoding: Encoding to use when creating Git objects
344 Returns: Tuple with commit object, diff contents and git version
345 """
346 encoding = encoding or getattr(f, "encoding", "ascii")
347 encoding = encoding or "ascii"
348 contents = f.read()
349 if isinstance(contents, bytes):
350 bparser = email.parser.BytesParser()
351 msg = bparser.parsebytes(contents)
352 else:
353 uparser = email.parser.Parser()
354 msg = uparser.parsestr(contents)
355 return parse_patch_message(msg, encoding)
358def parse_patch_message(msg, encoding=None):
359 """Extract a Commit object and patch from an e-mail message.
361 Args:
362 msg: An email message (email.message.Message)
363 encoding: Encoding to use to encode Git commits
364 Returns: Tuple with commit object, diff contents and git version
365 """
366 c = Commit()
367 c.author = msg["from"].encode(encoding)
368 c.committer = msg["from"].encode(encoding)
369 try:
370 patch_tag_start = msg["subject"].index("[PATCH")
371 except ValueError:
372 subject = msg["subject"]
373 else:
374 close = msg["subject"].index("] ", patch_tag_start)
375 subject = msg["subject"][close + 2 :]
376 c.message = (subject.replace("\n", "") + "\n").encode(encoding)
377 first = True
379 body = msg.get_payload(decode=True)
380 lines = body.splitlines(True)
381 line_iter = iter(lines)
383 for line in line_iter:
384 if line == b"---\n":
385 break
386 if first:
387 if line.startswith(b"From: "):
388 c.author = line[len(b"From: ") :].rstrip()
389 else:
390 c.message += b"\n" + line
391 first = False
392 else:
393 c.message += line
394 diff = b""
395 for line in line_iter:
396 if line == b"-- \n":
397 break
398 diff += line
399 try:
400 version = next(line_iter).rstrip(b"\n")
401 except StopIteration:
402 version = None
403 return c, diff, version