Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/attrs.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

171 statements  

1# attrs.py -- Git attributes for dulwich 

2# Copyright (C) 2019-2020 Collabora Ltd 

3# Copyright (C) 2019-2020 Andrej Shadura <andrew.shadura@collabora.co.uk> 

4# 

5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

6# General Public License as published by the Free Software Foundation; version 2.0 

7# or (at your option) any later version. You can redistribute it and/or 

8# modify it under the terms of either of these two licenses. 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16# You should have received a copy of the licenses; if not, see 

17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

19# License, Version 2.0. 

20# 

21 

22"""Parse .gitattributes file.""" 

23 

24import os 

25import re 

26from collections.abc import Generator, Iterator, Mapping, Sequence 

27from typing import IO 

28 

29AttributeValue = bytes | bool | None 

30 

31 

32def _parse_attr(attr: bytes) -> tuple[bytes, AttributeValue]: 

33 """Parse a git attribute into its value. 

34 

35 >>> _parse_attr(b'attr') 

36 (b'attr', True) 

37 >>> _parse_attr(b'-attr') 

38 (b'attr', False) 

39 >>> _parse_attr(b'!attr') 

40 (b'attr', None) 

41 >>> _parse_attr(b'attr=text') 

42 (b'attr', b'text') 

43 """ 

44 if attr.startswith(b"!"): 

45 return attr[1:], None 

46 if attr.startswith(b"-"): 

47 return attr[1:], False 

48 if b"=" not in attr: 

49 return attr, True 

50 # Split only on first = to handle values with = in them 

51 name, _, value = attr.partition(b"=") 

52 return name, value 

53 

54 

55def parse_git_attributes( 

56 f: IO[bytes], 

57) -> Generator[tuple[bytes, Mapping[bytes, AttributeValue]], None, None]: 

58 """Parse a Git attributes string. 

59 

60 Args: 

61 f: File-like object to read bytes from 

62 Returns: 

63 List of patterns and corresponding patterns in the order or them being encountered 

64 >>> from io import BytesIO 

65 >>> list(parse_git_attributes(BytesIO(b'''*.tar.* filter=lfs diff=lfs merge=lfs -text 

66 ... 

67 ... # store signatures in Git 

68 ... *.tar.*.asc -filter -diff merge=binary -text 

69 ... 

70 ... # store .dsc verbatim 

71 ... *.dsc -filter !diff merge=binary !text 

72 ... '''))) #doctest: +NORMALIZE_WHITESPACE 

73 [(b'*.tar.*', {'filter': 'lfs', 'diff': 'lfs', 'merge': 'lfs', 'text': False}), 

74 (b'*.tar.*.asc', {'filter': False, 'diff': False, 'merge': 'binary', 'text': False}), 

75 (b'*.dsc', {'filter': False, 'diff': None, 'merge': 'binary', 'text': None})] 

76 """ 

77 for line in f: 

78 line = line.strip() 

79 

80 # Ignore blank lines, they're used for readability. 

81 if not line: 

82 continue 

83 

84 if line.startswith(b"#"): 

85 # Comment 

86 continue 

87 

88 pattern, *attrs = line.split() 

89 

90 yield (pattern, {k: v for k, v in (_parse_attr(a) for a in attrs)}) 

91 

92 

93def _translate_pattern(pattern: bytes) -> bytes: 

94 """Translate a gitattributes pattern to a regular expression. 

95 

96 Similar to gitignore patterns, but simpler as gitattributes doesn't support 

97 all the same features (e.g., no directory-only patterns with trailing /). 

98 """ 

99 res = b"" 

100 i = 0 

101 n = len(pattern) 

102 

103 # If pattern doesn't contain /, it can match at any level 

104 if b"/" not in pattern: 

105 res = b"(?:.*/)??" 

106 elif pattern.startswith(b"/"): 

107 # Leading / means root of repository 

108 pattern = pattern[1:] 

109 n = len(pattern) 

110 

111 while i < n: 

112 c = pattern[i : i + 1] 

113 i += 1 

114 

115 if c == b"*": 

116 if i < n and pattern[i : i + 1] == b"*": 

117 # Double asterisk 

118 i += 1 

119 if i < n and pattern[i : i + 1] == b"/": 

120 # **/ - match zero or more directories 

121 res += b"(?:.*/)??" 

122 i += 1 

123 elif i == n: 

124 # ** at end - match everything 

125 res += b".*" 

126 else: 

127 # ** in middle 

128 res += b".*" 

129 else: 

130 # Single * - match any character except / 

131 res += b"[^/]*" 

132 elif c == b"?": 

133 res += b"[^/]" 

134 elif c == b"[": 

135 # Character class 

136 j = i 

137 if j < n and pattern[j : j + 1] == b"!": 

138 j += 1 

139 if j < n and pattern[j : j + 1] == b"]": 

140 j += 1 

141 while j < n and pattern[j : j + 1] != b"]": 

142 j += 1 

143 if j >= n: 

144 res += b"\\[" 

145 else: 

146 stuff = pattern[i:j].replace(b"\\", b"\\\\") 

147 i = j + 1 

148 if stuff.startswith(b"!"): 

149 stuff = b"^" + stuff[1:] 

150 elif stuff.startswith(b"^"): 

151 stuff = b"\\" + stuff 

152 res += b"[" + stuff + b"]" 

153 else: 

154 res += re.escape(c) 

155 

156 return res 

157 

158 

159class Pattern: 

160 """A single gitattributes pattern.""" 

161 

162 def __init__(self, pattern: bytes): 

163 """Initialize GitAttributesPattern. 

164 

165 Args: 

166 pattern: Attribute pattern as bytes 

167 """ 

168 self.pattern = pattern 

169 self._regex: re.Pattern[bytes] | None = None 

170 self._compile() 

171 

172 def _compile(self) -> None: 

173 """Compile the pattern to a regular expression.""" 

174 regex_pattern = _translate_pattern(self.pattern) 

175 # Add anchors 

176 regex_pattern = b"^" + regex_pattern + b"$" 

177 self._regex = re.compile(regex_pattern) 

178 

179 def match(self, path: bytes) -> bool: 

180 """Check if path matches this pattern. 

181 

182 Args: 

183 path: Path to check (relative to repository root, using / separators) 

184 

185 Returns: 

186 True if path matches this pattern 

187 """ 

188 # Normalize path 

189 if path.startswith(b"/"): 

190 path = path[1:] 

191 

192 # Try to match 

193 assert self._regex is not None # Always set by _compile() 

194 return bool(self._regex.match(path)) 

195 

196 

197def match_path( 

198 patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]], path: bytes 

199) -> dict[bytes, AttributeValue]: 

200 """Get attributes for a path by matching against patterns. 

201 

202 Args: 

203 patterns: List of (Pattern, attributes) tuples 

204 path: Path to match (relative to repository root) 

205 

206 Returns: 

207 Dictionary of attributes that apply to this path 

208 """ 

209 attributes: dict[bytes, AttributeValue] = {} 

210 

211 # Later patterns override earlier ones 

212 for pattern, attrs in patterns: 

213 if pattern.match(path): 

214 # Update attributes 

215 for name, value in attrs.items(): 

216 if value is None: 

217 # Unspecified - remove the attribute 

218 attributes.pop(name, None) 

219 else: 

220 attributes[name] = value 

221 

222 return attributes 

223 

224 

225def parse_gitattributes_file( 

226 filename: str | bytes, 

227) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

228 """Parse a gitattributes file and return compiled patterns. 

229 

230 Args: 

231 filename: Path to the .gitattributes file 

232 

233 Returns: 

234 List of (Pattern, attributes) tuples 

235 """ 

236 patterns = [] 

237 

238 if isinstance(filename, str): 

239 filename = filename.encode("utf-8") 

240 

241 with open(filename, "rb") as f: 

242 for pattern_bytes, attrs in parse_git_attributes(f): 

243 pattern = Pattern(pattern_bytes) 

244 patterns.append((pattern, attrs)) 

245 

246 return patterns 

247 

248 

249def read_gitattributes( 

250 path: str | bytes, 

251) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

252 """Read .gitattributes from a directory. 

253 

254 Args: 

255 path: Directory path to check for .gitattributes 

256 

257 Returns: 

258 List of (Pattern, attributes) tuples 

259 """ 

260 if isinstance(path, bytes): 

261 path = path.decode("utf-8") 

262 

263 gitattributes_path = os.path.join(path, ".gitattributes") 

264 if os.path.exists(gitattributes_path): 

265 return parse_gitattributes_file(gitattributes_path) 

266 

267 return [] 

268 

269 

270class GitAttributes: 

271 """A collection of gitattributes patterns that can match paths.""" 

272 

273 def __init__( 

274 self, 

275 patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] | None = None, 

276 ): 

277 """Initialize GitAttributes. 

278 

279 Args: 

280 patterns: Optional list of (Pattern, attributes) tuples 

281 """ 

282 self._patterns = patterns or [] 

283 

284 def match_path(self, path: bytes) -> dict[bytes, AttributeValue]: 

285 """Get attributes for a path by matching against patterns. 

286 

287 Args: 

288 path: Path to match (relative to repository root) 

289 

290 Returns: 

291 Dictionary of attributes that apply to this path 

292 """ 

293 return match_path(self._patterns, path) 

294 

295 def add_patterns( 

296 self, patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]] 

297 ) -> None: 

298 """Add patterns to the collection. 

299 

300 Args: 

301 patterns: List of (Pattern, attributes) tuples to add 

302 """ 

303 self._patterns.extend(patterns) 

304 

305 def __len__(self) -> int: 

306 """Return the number of patterns.""" 

307 return len(self._patterns) 

308 

309 def __iter__(self) -> Iterator[tuple["Pattern", Mapping[bytes, AttributeValue]]]: 

310 """Iterate over patterns.""" 

311 return iter(self._patterns) 

312 

313 @classmethod 

314 def from_file(cls, filename: str | bytes) -> "GitAttributes": 

315 """Create GitAttributes from a gitattributes file. 

316 

317 Args: 

318 filename: Path to the .gitattributes file 

319 

320 Returns: 

321 New GitAttributes instance 

322 """ 

323 patterns = parse_gitattributes_file(filename) 

324 return cls(patterns) 

325 

326 @classmethod 

327 def from_path(cls, path: str | bytes) -> "GitAttributes": 

328 """Create GitAttributes from .gitattributes in a directory. 

329 

330 Args: 

331 path: Directory path to check for .gitattributes 

332 

333 Returns: 

334 New GitAttributes instance 

335 """ 

336 patterns = read_gitattributes(path) 

337 return cls(patterns) 

338 

339 def set_attribute(self, pattern: bytes, name: bytes, value: AttributeValue) -> None: 

340 """Set an attribute for a pattern. 

341 

342 Args: 

343 pattern: The file pattern 

344 name: Attribute name 

345 value: Attribute value (bytes, True, False, or None) 

346 """ 

347 # Find existing pattern 

348 pattern_obj = None 

349 attrs_dict: dict[bytes, AttributeValue] | None = None 

350 pattern_index = -1 

351 

352 for i, (p, attrs) in enumerate(self._patterns): 

353 if p.pattern == pattern: 

354 pattern_obj = p 

355 # Convert to mutable dict 

356 attrs_dict = dict(attrs) 

357 pattern_index = i 

358 break 

359 

360 if pattern_obj is None: 

361 # Create new pattern 

362 pattern_obj = Pattern(pattern) 

363 attrs_dict = {name: value} 

364 self._patterns.append((pattern_obj, attrs_dict)) 

365 else: 

366 # Update the existing pattern in the list 

367 assert pattern_index >= 0 

368 assert attrs_dict is not None 

369 self._patterns[pattern_index] = (pattern_obj, attrs_dict) 

370 

371 # Update the attribute 

372 if attrs_dict is None: 

373 raise AssertionError("attrs_dict should not be None at this point") 

374 attrs_dict[name] = value 

375 

376 def remove_pattern(self, pattern: bytes) -> None: 

377 """Remove all attributes for a pattern. 

378 

379 Args: 

380 pattern: The file pattern to remove 

381 """ 

382 self._patterns = [ 

383 (p, attrs) for p, attrs in self._patterns if p.pattern != pattern 

384 ] 

385 

386 def to_bytes(self) -> bytes: 

387 """Convert GitAttributes to bytes format suitable for writing to file. 

388 

389 Returns: 

390 Bytes representation of the gitattributes file 

391 """ 

392 lines = [] 

393 for pattern_obj, attrs in self._patterns: 

394 pattern = pattern_obj.pattern 

395 attr_strs = [] 

396 

397 for name, value in sorted(attrs.items()): 

398 if value is True: 

399 attr_strs.append(name) 

400 elif value is False: 

401 attr_strs.append(b"-" + name) 

402 elif value is None: 

403 attr_strs.append(b"!" + name) 

404 else: 

405 # value is bytes 

406 attr_strs.append(name + b"=" + value) 

407 

408 if attr_strs: 

409 line = pattern + b" " + b" ".join(attr_strs) 

410 lines.append(line) 

411 

412 return b"\n".join(lines) + b"\n" if lines else b"" 

413 

414 def write_to_file(self, filename: str | bytes) -> None: 

415 """Write GitAttributes to a file. 

416 

417 Args: 

418 filename: Path to write the .gitattributes file 

419 """ 

420 if isinstance(filename, str): 

421 filename = filename.encode("utf-8") 

422 

423 content = self.to_bytes() 

424 with open(filename, "wb") as f: 

425 f.write(content)