Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/attrs.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

172 statements  

1# attrs.py -- Git attributes for dulwich 

2# Copyright (C) 2019-2020 Collabora Ltd 

3# Copyright (C) 2019-2020 Andrej Shadura <andrew.shadura@collabora.co.uk> 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23"""Parse .gitattributes file.""" 

24 

25__all__ = [ 

26 "AttributeValue", 

27 "GitAttributes", 

28 "Pattern", 

29 "match_path", 

30 "parse_git_attributes", 

31 "parse_gitattributes_file", 

32 "read_gitattributes", 

33] 

34 

35import os 

36import re 

37from collections.abc import Generator, Iterator, Mapping, Sequence 

38from typing import IO 

39 

40AttributeValue = bytes | bool | None 

41 

42 

43def _parse_attr(attr: bytes) -> tuple[bytes, AttributeValue]: 

44 """Parse a git attribute into its value. 

45 

46 >>> _parse_attr(b'attr') 

47 (b'attr', True) 

48 >>> _parse_attr(b'-attr') 

49 (b'attr', False) 

50 >>> _parse_attr(b'!attr') 

51 (b'attr', None) 

52 >>> _parse_attr(b'attr=text') 

53 (b'attr', b'text') 

54 """ 

55 if attr.startswith(b"!"): 

56 return attr[1:], None 

57 if attr.startswith(b"-"): 

58 return attr[1:], False 

59 if b"=" not in attr: 

60 return attr, True 

61 # Split only on first = to handle values with = in them 

62 name, _, value = attr.partition(b"=") 

63 return name, value 

64 

65 

66def parse_git_attributes( 

67 f: IO[bytes], 

68) -> Generator[tuple[bytes, Mapping[bytes, AttributeValue]], None, None]: 

69 """Parse a Git attributes string. 

70 

71 Args: 

72 f: File-like object to read bytes from 

73 Returns: 

74 List of patterns and corresponding patterns in the order or them being encountered 

75 >>> from io import BytesIO 

76 >>> list(parse_git_attributes(BytesIO(b'''*.tar.* filter=lfs diff=lfs merge=lfs -text 

77 ... 

78 ... # store signatures in Git 

79 ... *.tar.*.asc -filter -diff merge=binary -text 

80 ... 

81 ... # store .dsc verbatim 

82 ... *.dsc -filter !diff merge=binary !text 

83 ... '''))) #doctest: +NORMALIZE_WHITESPACE 

84 [(b'*.tar.*', {'filter': 'lfs', 'diff': 'lfs', 'merge': 'lfs', 'text': False}), 

85 (b'*.tar.*.asc', {'filter': False, 'diff': False, 'merge': 'binary', 'text': False}), 

86 (b'*.dsc', {'filter': False, 'diff': None, 'merge': 'binary', 'text': None})] 

87 """ 

88 for line in f: 

89 line = line.strip() 

90 

91 # Ignore blank lines, they're used for readability. 

92 if not line: 

93 continue 

94 

95 if line.startswith(b"#"): 

96 # Comment 

97 continue 

98 

99 pattern, *attrs = line.split() 

100 

101 yield (pattern, {k: v for k, v in (_parse_attr(a) for a in attrs)}) 

102 

103 

104def _translate_pattern(pattern: bytes) -> bytes: 

105 """Translate a gitattributes pattern to a regular expression. 

106 

107 Similar to gitignore patterns, but simpler as gitattributes doesn't support 

108 all the same features (e.g., no directory-only patterns with trailing /). 

109 """ 

110 res = b"" 

111 i = 0 

112 n = len(pattern) 

113 

114 # If pattern doesn't contain /, it can match at any level 

115 if b"/" not in pattern: 

116 res = b"(?:.*/)??" 

117 elif pattern.startswith(b"/"): 

118 # Leading / means root of repository 

119 pattern = pattern[1:] 

120 n = len(pattern) 

121 

122 while i < n: 

123 c = pattern[i : i + 1] 

124 i += 1 

125 

126 if c == b"*": 

127 if i < n and pattern[i : i + 1] == b"*": 

128 # Double asterisk 

129 i += 1 

130 if i < n and pattern[i : i + 1] == b"/": 

131 # **/ - match zero or more directories 

132 res += b"(?:.*/)??" 

133 i += 1 

134 elif i == n: 

135 # ** at end - match everything 

136 res += b".*" 

137 else: 

138 # ** in middle 

139 res += b".*" 

140 else: 

141 # Single * - match any character except / 

142 res += b"[^/]*" 

143 elif c == b"?": 

144 res += b"[^/]" 

145 elif c == b"[": 

146 # Character class 

147 j = i 

148 if j < n and pattern[j : j + 1] == b"!": 

149 j += 1 

150 if j < n and pattern[j : j + 1] == b"]": 

151 j += 1 

152 while j < n and pattern[j : j + 1] != b"]": 

153 j += 1 

154 if j >= n: 

155 res += b"\\[" 

156 else: 

157 stuff = pattern[i:j].replace(b"\\", b"\\\\") 

158 i = j + 1 

159 if stuff.startswith(b"!"): 

160 stuff = b"^" + stuff[1:] 

161 elif stuff.startswith(b"^"): 

162 stuff = b"\\" + stuff 

163 res += b"[" + stuff + b"]" 

164 else: 

165 res += re.escape(c) 

166 

167 return res 

168 

169 

170class Pattern: 

171 """A single gitattributes pattern.""" 

172 

173 def __init__(self, pattern: bytes): 

174 """Initialize GitAttributesPattern. 

175 

176 Args: 

177 pattern: Attribute pattern as bytes 

178 """ 

179 self.pattern = pattern 

180 self._regex: re.Pattern[bytes] | None = None 

181 self._compile() 

182 

183 def _compile(self) -> None: 

184 """Compile the pattern to a regular expression.""" 

185 regex_pattern = _translate_pattern(self.pattern) 

186 # Add anchors 

187 regex_pattern = b"^" + regex_pattern + b"$" 

188 self._regex = re.compile(regex_pattern) 

189 

190 def match(self, path: bytes) -> bool: 

191 """Check if path matches this pattern. 

192 

193 Args: 

194 path: Path to check (relative to repository root, using / separators) 

195 

196 Returns: 

197 True if path matches this pattern 

198 """ 

199 # Normalize path 

200 if path.startswith(b"/"): 

201 path = path[1:] 

202 

203 # Try to match 

204 assert self._regex is not None # Always set by _compile() 

205 return bool(self._regex.match(path)) 

206 

207 

208def match_path( 

209 patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]], path: bytes 

210) -> dict[bytes, AttributeValue]: 

211 """Get attributes for a path by matching against patterns. 

212 

213 Args: 

214 patterns: List of (Pattern, attributes) tuples 

215 path: Path to match (relative to repository root) 

216 

217 Returns: 

218 Dictionary of attributes that apply to this path 

219 """ 

220 attributes: dict[bytes, AttributeValue] = {} 

221 

222 # Later patterns override earlier ones 

223 for pattern, attrs in patterns: 

224 if pattern.match(path): 

225 # Update attributes 

226 for name, value in attrs.items(): 

227 if value is None: 

228 # Unspecified - remove the attribute 

229 attributes.pop(name, None) 

230 else: 

231 attributes[name] = value 

232 

233 return attributes 

234 

235 

236def parse_gitattributes_file( 

237 filename: str | bytes, 

238) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

239 """Parse a gitattributes file and return compiled patterns. 

240 

241 Args: 

242 filename: Path to the .gitattributes file 

243 

244 Returns: 

245 List of (Pattern, attributes) tuples 

246 """ 

247 patterns = [] 

248 

249 if isinstance(filename, str): 

250 filename = filename.encode("utf-8") 

251 

252 with open(filename, "rb") as f: 

253 for pattern_bytes, attrs in parse_git_attributes(f): 

254 pattern = Pattern(pattern_bytes) 

255 patterns.append((pattern, attrs)) 

256 

257 return patterns 

258 

259 

260def read_gitattributes( 

261 path: str | bytes, 

262) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

263 """Read .gitattributes from a directory. 

264 

265 Args: 

266 path: Directory path to check for .gitattributes 

267 

268 Returns: 

269 List of (Pattern, attributes) tuples 

270 """ 

271 if isinstance(path, bytes): 

272 path = path.decode("utf-8") 

273 

274 gitattributes_path = os.path.join(path, ".gitattributes") 

275 if os.path.exists(gitattributes_path): 

276 return parse_gitattributes_file(gitattributes_path) 

277 

278 return [] 

279 

280 

281class GitAttributes: 

282 """A collection of gitattributes patterns that can match paths.""" 

283 

284 def __init__( 

285 self, 

286 patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] | None = None, 

287 ): 

288 """Initialize GitAttributes. 

289 

290 Args: 

291 patterns: Optional list of (Pattern, attributes) tuples 

292 """ 

293 self._patterns = patterns or [] 

294 

295 def match_path(self, path: bytes) -> dict[bytes, AttributeValue]: 

296 """Get attributes for a path by matching against patterns. 

297 

298 Args: 

299 path: Path to match (relative to repository root) 

300 

301 Returns: 

302 Dictionary of attributes that apply to this path 

303 """ 

304 return match_path(self._patterns, path) 

305 

306 def add_patterns( 

307 self, patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]] 

308 ) -> None: 

309 """Add patterns to the collection. 

310 

311 Args: 

312 patterns: List of (Pattern, attributes) tuples to add 

313 """ 

314 self._patterns.extend(patterns) 

315 

316 def __len__(self) -> int: 

317 """Return the number of patterns.""" 

318 return len(self._patterns) 

319 

320 def __iter__(self) -> Iterator[tuple["Pattern", Mapping[bytes, AttributeValue]]]: 

321 """Iterate over patterns.""" 

322 return iter(self._patterns) 

323 

324 @classmethod 

325 def from_file(cls, filename: str | bytes) -> "GitAttributes": 

326 """Create GitAttributes from a gitattributes file. 

327 

328 Args: 

329 filename: Path to the .gitattributes file 

330 

331 Returns: 

332 New GitAttributes instance 

333 """ 

334 patterns = parse_gitattributes_file(filename) 

335 return cls(patterns) 

336 

337 @classmethod 

338 def from_path(cls, path: str | bytes) -> "GitAttributes": 

339 """Create GitAttributes from .gitattributes in a directory. 

340 

341 Args: 

342 path: Directory path to check for .gitattributes 

343 

344 Returns: 

345 New GitAttributes instance 

346 """ 

347 patterns = read_gitattributes(path) 

348 return cls(patterns) 

349 

350 def set_attribute(self, pattern: bytes, name: bytes, value: AttributeValue) -> None: 

351 """Set an attribute for a pattern. 

352 

353 Args: 

354 pattern: The file pattern 

355 name: Attribute name 

356 value: Attribute value (bytes, True, False, or None) 

357 """ 

358 # Find existing pattern 

359 pattern_obj = None 

360 attrs_dict: dict[bytes, AttributeValue] | None = None 

361 pattern_index = -1 

362 

363 for i, (p, attrs) in enumerate(self._patterns): 

364 if p.pattern == pattern: 

365 pattern_obj = p 

366 # Convert to mutable dict 

367 attrs_dict = dict(attrs) 

368 pattern_index = i 

369 break 

370 

371 if pattern_obj is None: 

372 # Create new pattern 

373 pattern_obj = Pattern(pattern) 

374 attrs_dict = {name: value} 

375 self._patterns.append((pattern_obj, attrs_dict)) 

376 else: 

377 # Update the existing pattern in the list 

378 assert pattern_index >= 0 

379 assert attrs_dict is not None 

380 self._patterns[pattern_index] = (pattern_obj, attrs_dict) 

381 

382 # Update the attribute 

383 if attrs_dict is None: 

384 raise AssertionError("attrs_dict should not be None at this point") 

385 attrs_dict[name] = value 

386 

387 def remove_pattern(self, pattern: bytes) -> None: 

388 """Remove all attributes for a pattern. 

389 

390 Args: 

391 pattern: The file pattern to remove 

392 """ 

393 self._patterns = [ 

394 (p, attrs) for p, attrs in self._patterns if p.pattern != pattern 

395 ] 

396 

397 def to_bytes(self) -> bytes: 

398 """Convert GitAttributes to bytes format suitable for writing to file. 

399 

400 Returns: 

401 Bytes representation of the gitattributes file 

402 """ 

403 lines = [] 

404 for pattern_obj, attrs in self._patterns: 

405 pattern = pattern_obj.pattern 

406 attr_strs = [] 

407 

408 for name, value in sorted(attrs.items()): 

409 if value is True: 

410 attr_strs.append(name) 

411 elif value is False: 

412 attr_strs.append(b"-" + name) 

413 elif value is None: 

414 attr_strs.append(b"!" + name) 

415 else: 

416 # value is bytes 

417 attr_strs.append(name + b"=" + value) 

418 

419 if attr_strs: 

420 line = pattern + b" " + b" ".join(attr_strs) 

421 lines.append(line) 

422 

423 return b"\n".join(lines) + b"\n" if lines else b"" 

424 

425 def write_to_file(self, filename: str | bytes) -> None: 

426 """Write GitAttributes to a file. 

427 

428 Args: 

429 filename: Path to write the .gitattributes file 

430 """ 

431 if isinstance(filename, str): 

432 filename = filename.encode("utf-8") 

433 

434 content = self.to_bytes() 

435 with open(filename, "wb") as f: 

436 f.write(content)