Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/patch.py: 12%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

186 statements  

1# patch.py -- For dealing with packed-style patches. 

2# Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# 

4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

5# General Public License as public by the Free Software Foundation; version 2.0 

6# or (at your option) any later version. You can redistribute it and/or 

7# modify it under the terms of either of these two licenses. 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# 

15# You should have received a copy of the licenses; if not, see 

16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

18# License, Version 2.0. 

19# 

20 

21"""Classes for dealing with git am-style patches. 

22 

23These patches are basically unified diffs with some extra metadata tacked 

24on. 

25""" 

26 

27import email.parser 

28import time 

29from difflib import SequenceMatcher 

30from typing import BinaryIO, Optional, TextIO, Union 

31 

32from .objects import S_ISGITLINK, Blob, Commit 

33from .pack import ObjectContainer 

34 

35FIRST_FEW_BYTES = 8000 

36 

37 

38def write_commit_patch(f, commit, contents, progress, version=None, encoding=None): 

39 """Write a individual file patch. 

40 

41 Args: 

42 commit: Commit object 

43 progress: Tuple with current patch number and total. 

44 

45 Returns: 

46 tuple with filename and contents 

47 """ 

48 encoding = encoding or getattr(f, "encoding", "ascii") 

49 if isinstance(contents, str): 

50 contents = contents.encode(encoding) 

51 (num, total) = progress 

52 f.write( 

53 b"From " 

54 + commit.id 

55 + b" " 

56 + time.ctime(commit.commit_time).encode(encoding) 

57 + b"\n" 

58 ) 

59 f.write(b"From: " + commit.author + b"\n") 

60 f.write( 

61 b"Date: " + time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n" 

62 ) 

63 f.write( 

64 ("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding) 

65 + commit.message 

66 + b"\n" 

67 ) 

68 f.write(b"\n") 

69 f.write(b"---\n") 

70 try: 

71 import subprocess 

72 

73 p = subprocess.Popen( 

74 ["diffstat"], stdout=subprocess.PIPE, stdin=subprocess.PIPE 

75 ) 

76 except (ImportError, OSError): 

77 pass # diffstat not available? 

78 else: 

79 (diffstat, _) = p.communicate(contents) 

80 f.write(diffstat) 

81 f.write(b"\n") 

82 f.write(contents) 

83 f.write(b"-- \n") 

84 if version is None: 

85 from dulwich import __version__ as dulwich_version 

86 

87 f.write(b"Dulwich %d.%d.%d\n" % dulwich_version) 

88 else: 

89 f.write(version.encode(encoding) + b"\n") 

90 

91 

92def get_summary(commit): 

93 """Determine the summary line for use in a filename. 

94 

95 Args: 

96 commit: Commit 

97 Returns: Summary string 

98 """ 

99 decoded = commit.message.decode(errors="replace") 

100 return decoded.splitlines()[0].replace(" ", "-") 

101 

102 

103# Unified Diff 

104def _format_range_unified(start, stop): 

105 """Convert range to the "ed" format.""" 

106 # Per the diff spec at http://www.unix.org/single_unix_specification/ 

107 beginning = start + 1 # lines start numbering with one 

108 length = stop - start 

109 if length == 1: 

110 return f"{beginning}" 

111 if not length: 

112 beginning -= 1 # empty ranges begin at line just before the range 

113 return f"{beginning},{length}" 

114 

115 

116def unified_diff( 

117 a, 

118 b, 

119 fromfile="", 

120 tofile="", 

121 fromfiledate="", 

122 tofiledate="", 

123 n=3, 

124 lineterm="\n", 

125 tree_encoding="utf-8", 

126 output_encoding="utf-8", 

127): 

128 """difflib.unified_diff that can detect "No newline at end of file" as 

129 original "git diff" does. 

130 

131 Based on the same function in Python2.7 difflib.py 

132 """ 

133 started = False 

134 for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n): 

135 if not started: 

136 started = True 

137 fromdate = f"\t{fromfiledate}" if fromfiledate else "" 

138 todate = f"\t{tofiledate}" if tofiledate else "" 

139 yield f"--- {fromfile.decode(tree_encoding)}{fromdate}{lineterm}".encode( 

140 output_encoding 

141 ) 

142 yield f"+++ {tofile.decode(tree_encoding)}{todate}{lineterm}".encode( 

143 output_encoding 

144 ) 

145 

146 first, last = group[0], group[-1] 

147 file1_range = _format_range_unified(first[1], last[2]) 

148 file2_range = _format_range_unified(first[3], last[4]) 

149 yield f"@@ -{file1_range} +{file2_range} @@{lineterm}".encode(output_encoding) 

150 

151 for tag, i1, i2, j1, j2 in group: 

152 if tag == "equal": 

153 for line in a[i1:i2]: 

154 yield b" " + line 

155 continue 

156 if tag in ("replace", "delete"): 

157 for line in a[i1:i2]: 

158 if not line[-1:] == b"\n": 

159 line += b"\n\\ No newline at end of file\n" 

160 yield b"-" + line 

161 if tag in ("replace", "insert"): 

162 for line in b[j1:j2]: 

163 if not line[-1:] == b"\n": 

164 line += b"\n\\ No newline at end of file\n" 

165 yield b"+" + line 

166 

167 

168def is_binary(content): 

169 """See if the first few bytes contain any null characters. 

170 

171 Args: 

172 content: Bytestring to check for binary content 

173 """ 

174 return b"\0" in content[:FIRST_FEW_BYTES] 

175 

176 

177def shortid(hexsha): 

178 if hexsha is None: 

179 return b"0" * 7 

180 else: 

181 return hexsha[:7] 

182 

183 

184def patch_filename(p, root): 

185 if p is None: 

186 return b"/dev/null" 

187 else: 

188 return root + b"/" + p 

189 

190 

191def write_object_diff(f, store: ObjectContainer, old_file, new_file, diff_binary=False): 

192 """Write the diff for an object. 

193 

194 Args: 

195 f: File-like object to write to 

196 store: Store to retrieve objects from, if necessary 

197 old_file: (path, mode, hexsha) tuple 

198 new_file: (path, mode, hexsha) tuple 

199 diff_binary: Whether to diff files even if they 

200 are considered binary files by is_binary(). 

201 

202 Note: the tuple elements should be None for nonexistent files 

203 """ 

204 (old_path, old_mode, old_id) = old_file 

205 (new_path, new_mode, new_id) = new_file 

206 patched_old_path = patch_filename(old_path, b"a") 

207 patched_new_path = patch_filename(new_path, b"b") 

208 

209 def content(mode, hexsha): 

210 if hexsha is None: 

211 return Blob.from_string(b"") 

212 elif S_ISGITLINK(mode): 

213 return Blob.from_string(b"Subproject commit " + hexsha + b"\n") 

214 else: 

215 return store[hexsha] 

216 

217 def lines(content): 

218 if not content: 

219 return [] 

220 else: 

221 return content.splitlines() 

222 

223 f.writelines( 

224 gen_diff_header((old_path, new_path), (old_mode, new_mode), (old_id, new_id)) 

225 ) 

226 old_content = content(old_mode, old_id) 

227 new_content = content(new_mode, new_id) 

228 if not diff_binary and (is_binary(old_content.data) or is_binary(new_content.data)): 

229 binary_diff = ( 

230 b"Binary files " 

231 + patched_old_path 

232 + b" and " 

233 + patched_new_path 

234 + b" differ\n" 

235 ) 

236 f.write(binary_diff) 

237 else: 

238 f.writelines( 

239 unified_diff( 

240 lines(old_content), 

241 lines(new_content), 

242 patched_old_path, 

243 patched_new_path, 

244 ) 

245 ) 

246 

247 

248# TODO(jelmer): Support writing unicode, rather than bytes. 

249def gen_diff_header(paths, modes, shas): 

250 """Write a blob diff header. 

251 

252 Args: 

253 paths: Tuple with old and new path 

254 modes: Tuple with old and new modes 

255 shas: Tuple with old and new shas 

256 """ 

257 (old_path, new_path) = paths 

258 (old_mode, new_mode) = modes 

259 (old_sha, new_sha) = shas 

260 if old_path is None and new_path is not None: 

261 old_path = new_path 

262 if new_path is None and old_path is not None: 

263 new_path = old_path 

264 old_path = patch_filename(old_path, b"a") 

265 new_path = patch_filename(new_path, b"b") 

266 yield b"diff --git " + old_path + b" " + new_path + b"\n" 

267 

268 if old_mode != new_mode: 

269 if new_mode is not None: 

270 if old_mode is not None: 

271 yield (f"old file mode {old_mode:o}\n").encode("ascii") 

272 yield (f"new file mode {new_mode:o}\n").encode("ascii") 

273 else: 

274 yield (f"deleted file mode {old_mode:o}\n").encode("ascii") 

275 yield b"index " + shortid(old_sha) + b".." + shortid(new_sha) 

276 if new_mode is not None and old_mode is not None: 

277 yield (f" {new_mode:o}").encode("ascii") 

278 yield b"\n" 

279 

280 

281# TODO(jelmer): Support writing unicode, rather than bytes. 

282def write_blob_diff(f, old_file, new_file): 

283 """Write blob diff. 

284 

285 Args: 

286 f: File-like object to write to 

287 old_file: (path, mode, hexsha) tuple (None if nonexisting) 

288 new_file: (path, mode, hexsha) tuple (None if nonexisting) 

289 

290 Note: The use of write_object_diff is recommended over this function. 

291 """ 

292 (old_path, old_mode, old_blob) = old_file 

293 (new_path, new_mode, new_blob) = new_file 

294 patched_old_path = patch_filename(old_path, b"a") 

295 patched_new_path = patch_filename(new_path, b"b") 

296 

297 def lines(blob): 

298 if blob is not None: 

299 return blob.splitlines() 

300 else: 

301 return [] 

302 

303 f.writelines( 

304 gen_diff_header( 

305 (old_path, new_path), 

306 (old_mode, new_mode), 

307 (getattr(old_blob, "id", None), getattr(new_blob, "id", None)), 

308 ) 

309 ) 

310 old_contents = lines(old_blob) 

311 new_contents = lines(new_blob) 

312 f.writelines( 

313 unified_diff(old_contents, new_contents, patched_old_path, patched_new_path) 

314 ) 

315 

316 

317def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False): 

318 """Write tree diff. 

319 

320 Args: 

321 f: File-like object to write to. 

322 old_tree: Old tree id 

323 new_tree: New tree id 

324 diff_binary: Whether to diff files even if they 

325 are considered binary files by is_binary(). 

326 """ 

327 changes = store.tree_changes(old_tree, new_tree) 

328 for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes: 

329 write_object_diff( 

330 f, 

331 store, 

332 (oldpath, oldmode, oldsha), 

333 (newpath, newmode, newsha), 

334 diff_binary=diff_binary, 

335 ) 

336 

337 

338def git_am_patch_split(f: Union[TextIO, BinaryIO], encoding: Optional[str] = None): 

339 """Parse a git-am-style patch and split it up into bits. 

340 

341 Args: 

342 f: File-like object to parse 

343 encoding: Encoding to use when creating Git objects 

344 Returns: Tuple with commit object, diff contents and git version 

345 """ 

346 encoding = encoding or getattr(f, "encoding", "ascii") 

347 encoding = encoding or "ascii" 

348 contents = f.read() 

349 if isinstance(contents, bytes): 

350 bparser = email.parser.BytesParser() 

351 msg = bparser.parsebytes(contents) 

352 else: 

353 uparser = email.parser.Parser() 

354 msg = uparser.parsestr(contents) 

355 return parse_patch_message(msg, encoding) 

356 

357 

358def parse_patch_message(msg, encoding=None): 

359 """Extract a Commit object and patch from an e-mail message. 

360 

361 Args: 

362 msg: An email message (email.message.Message) 

363 encoding: Encoding to use to encode Git commits 

364 Returns: Tuple with commit object, diff contents and git version 

365 """ 

366 c = Commit() 

367 c.author = msg["from"].encode(encoding) 

368 c.committer = msg["from"].encode(encoding) 

369 try: 

370 patch_tag_start = msg["subject"].index("[PATCH") 

371 except ValueError: 

372 subject = msg["subject"] 

373 else: 

374 close = msg["subject"].index("] ", patch_tag_start) 

375 subject = msg["subject"][close + 2 :] 

376 c.message = (subject.replace("\n", "") + "\n").encode(encoding) 

377 first = True 

378 

379 body = msg.get_payload(decode=True) 

380 lines = body.splitlines(True) 

381 line_iter = iter(lines) 

382 

383 for line in line_iter: 

384 if line == b"---\n": 

385 break 

386 if first: 

387 if line.startswith(b"From: "): 

388 c.author = line[len(b"From: ") :].rstrip() 

389 else: 

390 c.message += b"\n" + line 

391 first = False 

392 else: 

393 c.message += line 

394 diff = b"" 

395 for line in line_iter: 

396 if line == b"-- \n": 

397 break 

398 diff += line 

399 try: 

400 version = next(line_iter).rstrip(b"\n") 

401 except StopIteration: 

402 version = None 

403 return c, diff, version