Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/attrs.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

172 statements  

1# attrs.py -- Git attributes for dulwich 

2# Copyright (C) 2019-2020 Collabora Ltd 

3# Copyright (C) 2019-2020 Andrej Shadura <andrew.shadura@collabora.co.uk> 

4# 

5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

6# General Public License as published by the Free Software Foundation; version 2.0 

7# or (at your option) any later version. You can redistribute it and/or 

8# modify it under the terms of either of these two licenses. 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16# You should have received a copy of the licenses; if not, see 

17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

19# License, Version 2.0. 

20# 

21 

22"""Parse .gitattributes file.""" 

23 

24__all__ = [ 

25 "AttributeValue", 

26 "GitAttributes", 

27 "Pattern", 

28 "match_path", 

29 "parse_git_attributes", 

30 "parse_gitattributes_file", 

31 "read_gitattributes", 

32] 

33 

34import os 

35import re 

36from collections.abc import Generator, Iterator, Mapping, Sequence 

37from typing import IO 

38 

39AttributeValue = bytes | bool | None 

40 

41 

42def _parse_attr(attr: bytes) -> tuple[bytes, AttributeValue]: 

43 """Parse a git attribute into its value. 

44 

45 >>> _parse_attr(b'attr') 

46 (b'attr', True) 

47 >>> _parse_attr(b'-attr') 

48 (b'attr', False) 

49 >>> _parse_attr(b'!attr') 

50 (b'attr', None) 

51 >>> _parse_attr(b'attr=text') 

52 (b'attr', b'text') 

53 """ 

54 if attr.startswith(b"!"): 

55 return attr[1:], None 

56 if attr.startswith(b"-"): 

57 return attr[1:], False 

58 if b"=" not in attr: 

59 return attr, True 

60 # Split only on first = to handle values with = in them 

61 name, _, value = attr.partition(b"=") 

62 return name, value 

63 

64 

65def parse_git_attributes( 

66 f: IO[bytes], 

67) -> Generator[tuple[bytes, Mapping[bytes, AttributeValue]], None, None]: 

68 """Parse a Git attributes string. 

69 

70 Args: 

71 f: File-like object to read bytes from 

72 Returns: 

73 List of patterns and corresponding patterns in the order or them being encountered 

74 >>> from io import BytesIO 

75 >>> list(parse_git_attributes(BytesIO(b'''*.tar.* filter=lfs diff=lfs merge=lfs -text 

76 ... 

77 ... # store signatures in Git 

78 ... *.tar.*.asc -filter -diff merge=binary -text 

79 ... 

80 ... # store .dsc verbatim 

81 ... *.dsc -filter !diff merge=binary !text 

82 ... '''))) #doctest: +NORMALIZE_WHITESPACE 

83 [(b'*.tar.*', {'filter': 'lfs', 'diff': 'lfs', 'merge': 'lfs', 'text': False}), 

84 (b'*.tar.*.asc', {'filter': False, 'diff': False, 'merge': 'binary', 'text': False}), 

85 (b'*.dsc', {'filter': False, 'diff': None, 'merge': 'binary', 'text': None})] 

86 """ 

87 for line in f: 

88 line = line.strip() 

89 

90 # Ignore blank lines, they're used for readability. 

91 if not line: 

92 continue 

93 

94 if line.startswith(b"#"): 

95 # Comment 

96 continue 

97 

98 pattern, *attrs = line.split() 

99 

100 yield (pattern, {k: v for k, v in (_parse_attr(a) for a in attrs)}) 

101 

102 

103def _translate_pattern(pattern: bytes) -> bytes: 

104 """Translate a gitattributes pattern to a regular expression. 

105 

106 Similar to gitignore patterns, but simpler as gitattributes doesn't support 

107 all the same features (e.g., no directory-only patterns with trailing /). 

108 """ 

109 res = b"" 

110 i = 0 

111 n = len(pattern) 

112 

113 # If pattern doesn't contain /, it can match at any level 

114 if b"/" not in pattern: 

115 res = b"(?:.*/)??" 

116 elif pattern.startswith(b"/"): 

117 # Leading / means root of repository 

118 pattern = pattern[1:] 

119 n = len(pattern) 

120 

121 while i < n: 

122 c = pattern[i : i + 1] 

123 i += 1 

124 

125 if c == b"*": 

126 if i < n and pattern[i : i + 1] == b"*": 

127 # Double asterisk 

128 i += 1 

129 if i < n and pattern[i : i + 1] == b"/": 

130 # **/ - match zero or more directories 

131 res += b"(?:.*/)??" 

132 i += 1 

133 elif i == n: 

134 # ** at end - match everything 

135 res += b".*" 

136 else: 

137 # ** in middle 

138 res += b".*" 

139 else: 

140 # Single * - match any character except / 

141 res += b"[^/]*" 

142 elif c == b"?": 

143 res += b"[^/]" 

144 elif c == b"[": 

145 # Character class 

146 j = i 

147 if j < n and pattern[j : j + 1] == b"!": 

148 j += 1 

149 if j < n and pattern[j : j + 1] == b"]": 

150 j += 1 

151 while j < n and pattern[j : j + 1] != b"]": 

152 j += 1 

153 if j >= n: 

154 res += b"\\[" 

155 else: 

156 stuff = pattern[i:j].replace(b"\\", b"\\\\") 

157 i = j + 1 

158 if stuff.startswith(b"!"): 

159 stuff = b"^" + stuff[1:] 

160 elif stuff.startswith(b"^"): 

161 stuff = b"\\" + stuff 

162 res += b"[" + stuff + b"]" 

163 else: 

164 res += re.escape(c) 

165 

166 return res 

167 

168 

169class Pattern: 

170 """A single gitattributes pattern.""" 

171 

172 def __init__(self, pattern: bytes): 

173 """Initialize GitAttributesPattern. 

174 

175 Args: 

176 pattern: Attribute pattern as bytes 

177 """ 

178 self.pattern = pattern 

179 self._regex: re.Pattern[bytes] | None = None 

180 self._compile() 

181 

182 def _compile(self) -> None: 

183 """Compile the pattern to a regular expression.""" 

184 regex_pattern = _translate_pattern(self.pattern) 

185 # Add anchors 

186 regex_pattern = b"^" + regex_pattern + b"$" 

187 self._regex = re.compile(regex_pattern) 

188 

189 def match(self, path: bytes) -> bool: 

190 """Check if path matches this pattern. 

191 

192 Args: 

193 path: Path to check (relative to repository root, using / separators) 

194 

195 Returns: 

196 True if path matches this pattern 

197 """ 

198 # Normalize path 

199 if path.startswith(b"/"): 

200 path = path[1:] 

201 

202 # Try to match 

203 assert self._regex is not None # Always set by _compile() 

204 return bool(self._regex.match(path)) 

205 

206 

207def match_path( 

208 patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]], path: bytes 

209) -> dict[bytes, AttributeValue]: 

210 """Get attributes for a path by matching against patterns. 

211 

212 Args: 

213 patterns: List of (Pattern, attributes) tuples 

214 path: Path to match (relative to repository root) 

215 

216 Returns: 

217 Dictionary of attributes that apply to this path 

218 """ 

219 attributes: dict[bytes, AttributeValue] = {} 

220 

221 # Later patterns override earlier ones 

222 for pattern, attrs in patterns: 

223 if pattern.match(path): 

224 # Update attributes 

225 for name, value in attrs.items(): 

226 if value is None: 

227 # Unspecified - remove the attribute 

228 attributes.pop(name, None) 

229 else: 

230 attributes[name] = value 

231 

232 return attributes 

233 

234 

235def parse_gitattributes_file( 

236 filename: str | bytes, 

237) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

238 """Parse a gitattributes file and return compiled patterns. 

239 

240 Args: 

241 filename: Path to the .gitattributes file 

242 

243 Returns: 

244 List of (Pattern, attributes) tuples 

245 """ 

246 patterns = [] 

247 

248 if isinstance(filename, str): 

249 filename = filename.encode("utf-8") 

250 

251 with open(filename, "rb") as f: 

252 for pattern_bytes, attrs in parse_git_attributes(f): 

253 pattern = Pattern(pattern_bytes) 

254 patterns.append((pattern, attrs)) 

255 

256 return patterns 

257 

258 

259def read_gitattributes( 

260 path: str | bytes, 

261) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

262 """Read .gitattributes from a directory. 

263 

264 Args: 

265 path: Directory path to check for .gitattributes 

266 

267 Returns: 

268 List of (Pattern, attributes) tuples 

269 """ 

270 if isinstance(path, bytes): 

271 path = path.decode("utf-8") 

272 

273 gitattributes_path = os.path.join(path, ".gitattributes") 

274 if os.path.exists(gitattributes_path): 

275 return parse_gitattributes_file(gitattributes_path) 

276 

277 return [] 

278 

279 

280class GitAttributes: 

281 """A collection of gitattributes patterns that can match paths.""" 

282 

283 def __init__( 

284 self, 

285 patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] | None = None, 

286 ): 

287 """Initialize GitAttributes. 

288 

289 Args: 

290 patterns: Optional list of (Pattern, attributes) tuples 

291 """ 

292 self._patterns = patterns or [] 

293 

294 def match_path(self, path: bytes) -> dict[bytes, AttributeValue]: 

295 """Get attributes for a path by matching against patterns. 

296 

297 Args: 

298 path: Path to match (relative to repository root) 

299 

300 Returns: 

301 Dictionary of attributes that apply to this path 

302 """ 

303 return match_path(self._patterns, path) 

304 

305 def add_patterns( 

306 self, patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]] 

307 ) -> None: 

308 """Add patterns to the collection. 

309 

310 Args: 

311 patterns: List of (Pattern, attributes) tuples to add 

312 """ 

313 self._patterns.extend(patterns) 

314 

315 def __len__(self) -> int: 

316 """Return the number of patterns.""" 

317 return len(self._patterns) 

318 

319 def __iter__(self) -> Iterator[tuple["Pattern", Mapping[bytes, AttributeValue]]]: 

320 """Iterate over patterns.""" 

321 return iter(self._patterns) 

322 

323 @classmethod 

324 def from_file(cls, filename: str | bytes) -> "GitAttributes": 

325 """Create GitAttributes from a gitattributes file. 

326 

327 Args: 

328 filename: Path to the .gitattributes file 

329 

330 Returns: 

331 New GitAttributes instance 

332 """ 

333 patterns = parse_gitattributes_file(filename) 

334 return cls(patterns) 

335 

336 @classmethod 

337 def from_path(cls, path: str | bytes) -> "GitAttributes": 

338 """Create GitAttributes from .gitattributes in a directory. 

339 

340 Args: 

341 path: Directory path to check for .gitattributes 

342 

343 Returns: 

344 New GitAttributes instance 

345 """ 

346 patterns = read_gitattributes(path) 

347 return cls(patterns) 

348 

349 def set_attribute(self, pattern: bytes, name: bytes, value: AttributeValue) -> None: 

350 """Set an attribute for a pattern. 

351 

352 Args: 

353 pattern: The file pattern 

354 name: Attribute name 

355 value: Attribute value (bytes, True, False, or None) 

356 """ 

357 # Find existing pattern 

358 pattern_obj = None 

359 attrs_dict: dict[bytes, AttributeValue] | None = None 

360 pattern_index = -1 

361 

362 for i, (p, attrs) in enumerate(self._patterns): 

363 if p.pattern == pattern: 

364 pattern_obj = p 

365 # Convert to mutable dict 

366 attrs_dict = dict(attrs) 

367 pattern_index = i 

368 break 

369 

370 if pattern_obj is None: 

371 # Create new pattern 

372 pattern_obj = Pattern(pattern) 

373 attrs_dict = {name: value} 

374 self._patterns.append((pattern_obj, attrs_dict)) 

375 else: 

376 # Update the existing pattern in the list 

377 assert pattern_index >= 0 

378 assert attrs_dict is not None 

379 self._patterns[pattern_index] = (pattern_obj, attrs_dict) 

380 

381 # Update the attribute 

382 if attrs_dict is None: 

383 raise AssertionError("attrs_dict should not be None at this point") 

384 attrs_dict[name] = value 

385 

386 def remove_pattern(self, pattern: bytes) -> None: 

387 """Remove all attributes for a pattern. 

388 

389 Args: 

390 pattern: The file pattern to remove 

391 """ 

392 self._patterns = [ 

393 (p, attrs) for p, attrs in self._patterns if p.pattern != pattern 

394 ] 

395 

396 def to_bytes(self) -> bytes: 

397 """Convert GitAttributes to bytes format suitable for writing to file. 

398 

399 Returns: 

400 Bytes representation of the gitattributes file 

401 """ 

402 lines = [] 

403 for pattern_obj, attrs in self._patterns: 

404 pattern = pattern_obj.pattern 

405 attr_strs = [] 

406 

407 for name, value in sorted(attrs.items()): 

408 if value is True: 

409 attr_strs.append(name) 

410 elif value is False: 

411 attr_strs.append(b"-" + name) 

412 elif value is None: 

413 attr_strs.append(b"!" + name) 

414 else: 

415 # value is bytes 

416 attr_strs.append(name + b"=" + value) 

417 

418 if attr_strs: 

419 line = pattern + b" " + b" ".join(attr_strs) 

420 lines.append(line) 

421 

422 return b"\n".join(lines) + b"\n" if lines else b"" 

423 

424 def write_to_file(self, filename: str | bytes) -> None: 

425 """Write GitAttributes to a file. 

426 

427 Args: 

428 filename: Path to write the .gitattributes file 

429 """ 

430 if isinstance(filename, str): 

431 filename = filename.encode("utf-8") 

432 

433 content = self.to_bytes() 

434 with open(filename, "wb") as f: 

435 f.write(content)