Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/patch.py: 12%

1# patch.py -- For dealing with packed-style patches.

4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU

5# General Public License as public by the Free Software Foundation; version 2.0

6# or (at your option) any later version. You can redistribute it and/or

7# modify it under the terms of either of these two licenses.

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14#

15# You should have received a copy of the licenses; if not, see

16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License

17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache

18# License, Version 2.0.

19#

21"""Classes for dealing with git am-style patches.

23These patches are basically unified diffs with some extra metadata tacked

24on.

25"""

27import email.parser

28import time

29from difflib import SequenceMatcher

30from typing import BinaryIO, Optional, TextIO, Union

32from .objects import S_ISGITLINK, Blob, Commit

33from .pack import ObjectContainer

35FIRST_FEW_BYTES = 8000

38def write_commit_patch(f, commit, contents, progress, version=None, encoding=None):

39 """Write a individual file patch.

41 Args:

42 commit: Commit object

43 progress: Tuple with current patch number and total.

45 Returns:

46 tuple with filename and contents

47 """

48 encoding = encoding or getattr(f, "encoding", "ascii")

49 if isinstance(contents, str):

50 contents = contents.encode(encoding)

51 (num, total) = progress

52 f.write(

53 b"From "

54 + commit.id

55 + b" "

56 + time.ctime(commit.commit_time).encode(encoding)

57 + b"\n"

58 )

59 f.write(b"From: " + commit.author + b"\n")

60 f.write(

61 b"Date: " + time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n"

62 )

63 f.write(

64 ("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding)

65 + commit.message

66 + b"\n"

67 )

68 f.write(b"\n")

69 f.write(b"---\n")

70 try:

71 import subprocess

73 p = subprocess.Popen(

74 ["diffstat"], stdout=subprocess.PIPE, stdin=subprocess.PIPE

75 )

76 except (ImportError, OSError):

77 pass # diffstat not available?

78 else:

79 (diffstat, _) = p.communicate(contents)

80 f.write(diffstat)

81 f.write(b"\n")

82 f.write(contents)

83 f.write(b"-- \n")

84 if version is None:

85 from dulwich import __version__ as dulwich_version

87 f.write(b"Dulwich %d.%d.%d\n" % dulwich_version)

88 else:

89 f.write(version.encode(encoding) + b"\n")

92def get_summary(commit):

93 """Determine the summary line for use in a filename.

95 Args:

96 commit: Commit

97 Returns: Summary string

98 """

99 decoded = commit.message.decode(errors="replace")

100 return decoded.splitlines()[0].replace(" ", "-")

101

102

103# Unified Diff

104def _format_range_unified(start, stop):

105 """Convert range to the "ed" format."""

106 # Per the diff spec at http://www.unix.org/single_unix_specification/

107 beginning = start + 1 # lines start numbering with one

108 length = stop - start

109 if length == 1:

110 return f"{beginning}"

111 if not length:

112 beginning -= 1 # empty ranges begin at line just before the range

113 return f"{beginning},{length}"

114

115

116def unified_diff(

117 a,

118 b,

119 fromfile="",

120 tofile="",

121 fromfiledate="",

122 tofiledate="",

123 n=3,

124 lineterm="\n",

125 tree_encoding="utf-8",

126 output_encoding="utf-8",

127):

128 """difflib.unified_diff that can detect "No newline at end of file" as

129 original "git diff" does.

130

131 Based on the same function in Python2.7 difflib.py

132 """

133 started = False

134 for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):

135 if not started:

136 started = True

137 fromdate = f"\t{fromfiledate}" if fromfiledate else ""

138 todate = f"\t{tofiledate}" if tofiledate else ""

139 yield f"--- {fromfile.decode(tree_encoding)}{fromdate}{lineterm}".encode(

140 output_encoding

141 )

142 yield f"+++ {tofile.decode(tree_encoding)}{todate}{lineterm}".encode(

143 output_encoding

144 )

145

146 first, last = group[0], group[-1]

147 file1_range = _format_range_unified(first[1], last[2])

148 file2_range = _format_range_unified(first[3], last[4])

149 yield f"@@ -{file1_range} +{file2_range} @@{lineterm}".encode(output_encoding)

150

151 for tag, i1, i2, j1, j2 in group:

152 if tag == "equal":

153 for line in a[i1:i2]:

154 yield b" " + line

155 continue

156 if tag in ("replace", "delete"):

157 for line in a[i1:i2]:

158 if not line[-1:] == b"\n":

159 line += b"\n\\ No newline at end of file\n"

160 yield b"-" + line

161 if tag in ("replace", "insert"):

162 for line in b[j1:j2]:

163 if not line[-1:] == b"\n":

164 line += b"\n\\ No newline at end of file\n"

165 yield b"+" + line

166

167

168def is_binary(content):

169 """See if the first few bytes contain any null characters.

170

171 Args:

172 content: Bytestring to check for binary content

173 """

174 return b"\0" in content[:FIRST_FEW_BYTES]

175

176

177def shortid(hexsha):

178 if hexsha is None:

179 return b"0" * 7

180 else:

181 return hexsha[:7]

182

183

184def patch_filename(p, root):

185 if p is None:

186 return b"/dev/null"

187 else:

188 return root + b"/" + p

189

190

191def write_object_diff(f, store: ObjectContainer, old_file, new_file, diff_binary=False):

192 """Write the diff for an object.

193

194 Args:

195 f: File-like object to write to

196 store: Store to retrieve objects from, if necessary

197 old_file: (path, mode, hexsha) tuple

198 new_file: (path, mode, hexsha) tuple

199 diff_binary: Whether to diff files even if they

200 are considered binary files by is_binary().

201

202 Note: the tuple elements should be None for nonexistent files

203 """

204 (old_path, old_mode, old_id) = old_file

205 (new_path, new_mode, new_id) = new_file

206 patched_old_path = patch_filename(old_path, b"a")

207 patched_new_path = patch_filename(new_path, b"b")

208

209 def content(mode, hexsha):

210 if hexsha is None:

211 return Blob.from_string(b"")

212 elif S_ISGITLINK(mode):

213 return Blob.from_string(b"Subproject commit " + hexsha + b"\n")

214 else:

215 return store[hexsha]

216

217 def lines(content):

218 if not content:

219 return []

220 else:

221 return content.splitlines()

222

223 f.writelines(

224 gen_diff_header((old_path, new_path), (old_mode, new_mode), (old_id, new_id))

225 )

226 old_content = content(old_mode, old_id)

227 new_content = content(new_mode, new_id)

228 if not diff_binary and (is_binary(old_content.data) or is_binary(new_content.data)):

229 binary_diff = (

230 b"Binary files "

231 + patched_old_path

232 + b" and "

233 + patched_new_path

234 + b" differ\n"

235 )

236 f.write(binary_diff)

237 else:

238 f.writelines(

239 unified_diff(

240 lines(old_content),

241 lines(new_content),

242 patched_old_path,

243 patched_new_path,

244 )

245 )

246

247

248# TODO(jelmer): Support writing unicode, rather than bytes.

249def gen_diff_header(paths, modes, shas):

250 """Write a blob diff header.

251

252 Args:

253 paths: Tuple with old and new path

254 modes: Tuple with old and new modes

255 shas: Tuple with old and new shas

256 """

257 (old_path, new_path) = paths

258 (old_mode, new_mode) = modes

259 (old_sha, new_sha) = shas

260 if old_path is None and new_path is not None:

261 old_path = new_path

262 if new_path is None and old_path is not None:

263 new_path = old_path

264 old_path = patch_filename(old_path, b"a")

265 new_path = patch_filename(new_path, b"b")

266 yield b"diff --git " + old_path + b" " + new_path + b"\n"

267

268 if old_mode != new_mode:

269 if new_mode is not None:

270 if old_mode is not None:

271 yield (f"old file mode {old_mode:o}\n").encode("ascii")

272 yield (f"new file mode {new_mode:o}\n").encode("ascii")

273 else:

274 yield (f"deleted file mode {old_mode:o}\n").encode("ascii")

275 yield b"index " + shortid(old_sha) + b".." + shortid(new_sha)

276 if new_mode is not None and old_mode is not None:

277 yield (f" {new_mode:o}").encode("ascii")

278 yield b"\n"

279

280

281# TODO(jelmer): Support writing unicode, rather than bytes.

282def write_blob_diff(f, old_file, new_file):

283 """Write blob diff.

284

285 Args:

286 f: File-like object to write to

287 old_file: (path, mode, hexsha) tuple (None if nonexisting)

288 new_file: (path, mode, hexsha) tuple (None if nonexisting)

289

290 Note: The use of write_object_diff is recommended over this function.

291 """

292 (old_path, old_mode, old_blob) = old_file

293 (new_path, new_mode, new_blob) = new_file

294 patched_old_path = patch_filename(old_path, b"a")

295 patched_new_path = patch_filename(new_path, b"b")

296

297 def lines(blob):

298 if blob is not None:

299 return blob.splitlines()

300 else:

301 return []

302

303 f.writelines(

304 gen_diff_header(

305 (old_path, new_path),

306 (old_mode, new_mode),

307 (getattr(old_blob, "id", None), getattr(new_blob, "id", None)),

308 )

309 )

310 old_contents = lines(old_blob)

311 new_contents = lines(new_blob)

312 f.writelines(

313 unified_diff(old_contents, new_contents, patched_old_path, patched_new_path)

314 )

315

316

317def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):

318 """Write tree diff.

319

320 Args:

321 f: File-like object to write to.

322 old_tree: Old tree id

323 new_tree: New tree id

324 diff_binary: Whether to diff files even if they

325 are considered binary files by is_binary().

326 """

327 changes = store.tree_changes(old_tree, new_tree)

328 for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:

329 write_object_diff(

330 f,

331 store,

332 (oldpath, oldmode, oldsha),

333 (newpath, newmode, newsha),

334 diff_binary=diff_binary,

335 )

336

337

338def git_am_patch_split(f: Union[TextIO, BinaryIO], encoding: Optional[str] = None):

339 """Parse a git-am-style patch and split it up into bits.

340

341 Args:

342 f: File-like object to parse

343 encoding: Encoding to use when creating Git objects

344 Returns: Tuple with commit object, diff contents and git version

345 """

346 encoding = encoding or getattr(f, "encoding", "ascii")

347 encoding = encoding or "ascii"

348 contents = f.read()

349 if isinstance(contents, bytes):

350 bparser = email.parser.BytesParser()

351 msg = bparser.parsebytes(contents)

352 else:

353 uparser = email.parser.Parser()

354 msg = uparser.parsestr(contents)

355 return parse_patch_message(msg, encoding)

356

357

358def parse_patch_message(msg, encoding=None):

359 """Extract a Commit object and patch from an e-mail message.

360

361 Args:

362 msg: An email message (email.message.Message)

363 encoding: Encoding to use to encode Git commits

364 Returns: Tuple with commit object, diff contents and git version

365 """

366 c = Commit()

367 c.author = msg["from"].encode(encoding)

368 c.committer = msg["from"].encode(encoding)

369 try:

370 patch_tag_start = msg["subject"].index("[PATCH")

371 except ValueError:

372 subject = msg["subject"]

373 else:

374 close = msg["subject"].index("] ", patch_tag_start)

375 subject = msg["subject"][close + 2 :]

376 c.message = (subject.replace("\n", "") + "\n").encode(encoding)

377 first = True

378

379 body = msg.get_payload(decode=True)

380 lines = body.splitlines(True)

381 line_iter = iter(lines)

382

383 for line in line_iter:

384 if line == b"---\n":

385 break

386 if first:

387 if line.startswith(b"From: "):

388 c.author = line[len(b"From: ") :].rstrip()

389 else:

390 c.message += b"\n" + line

391 first = False

392 else:

393 c.message += line

394 diff = b""

395 for line in line_iter:

396 if line == b"-- \n":

397 break

398 diff += line

399 try:

400 version = next(line_iter).rstrip(b"\n")

401 except StopIteration:

402 version = None

403 return c, diff, version