Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/attrs.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

171 statements  

1# attrs.py -- Git attributes for dulwich 

2# Copyright (C) 2019-2020 Collabora Ltd 

3# Copyright (C) 2019-2020 Andrej Shadura <andrew.shadura@collabora.co.uk> 

4# 

5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

6# General Public License as published by the Free Software Foundation; version 2.0 

7# or (at your option) any later version. You can redistribute it and/or 

8# modify it under the terms of either of these two licenses. 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16# You should have received a copy of the licenses; if not, see 

17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

19# License, Version 2.0. 

20# 

21 

22"""Parse .gitattributes file.""" 

23 

24import os 

25import re 

26from collections.abc import Generator, Iterator, Mapping 

27from typing import ( 

28 IO, 

29 Optional, 

30 Union, 

31) 

32 

33AttributeValue = Union[bytes, bool, None] 

34 

35 

36def _parse_attr(attr: bytes) -> tuple[bytes, AttributeValue]: 

37 """Parse a git attribute into its value. 

38 

39 >>> _parse_attr(b'attr') 

40 (b'attr', True) 

41 >>> _parse_attr(b'-attr') 

42 (b'attr', False) 

43 >>> _parse_attr(b'!attr') 

44 (b'attr', None) 

45 >>> _parse_attr(b'attr=text') 

46 (b'attr', b'text') 

47 """ 

48 if attr.startswith(b"!"): 

49 return attr[1:], None 

50 if attr.startswith(b"-"): 

51 return attr[1:], False 

52 if b"=" not in attr: 

53 return attr, True 

54 # Split only on first = to handle values with = in them 

55 name, _, value = attr.partition(b"=") 

56 return name, value 

57 

58 

59def parse_git_attributes( 

60 f: IO[bytes], 

61) -> Generator[tuple[bytes, Mapping[bytes, AttributeValue]], None, None]: 

62 """Parse a Git attributes string. 

63 

64 Args: 

65 f: File-like object to read bytes from 

66 Returns: 

67 List of patterns and corresponding patterns in the order or them being encountered 

68 >>> from io import BytesIO 

69 >>> list(parse_git_attributes(BytesIO(b'''*.tar.* filter=lfs diff=lfs merge=lfs -text 

70 ... 

71 ... # store signatures in Git 

72 ... *.tar.*.asc -filter -diff merge=binary -text 

73 ... 

74 ... # store .dsc verbatim 

75 ... *.dsc -filter !diff merge=binary !text 

76 ... '''))) #doctest: +NORMALIZE_WHITESPACE 

77 [(b'*.tar.*', {'filter': 'lfs', 'diff': 'lfs', 'merge': 'lfs', 'text': False}), 

78 (b'*.tar.*.asc', {'filter': False, 'diff': False, 'merge': 'binary', 'text': False}), 

79 (b'*.dsc', {'filter': False, 'diff': None, 'merge': 'binary', 'text': None})] 

80 """ 

81 for line in f: 

82 line = line.strip() 

83 

84 # Ignore blank lines, they're used for readability. 

85 if not line: 

86 continue 

87 

88 if line.startswith(b"#"): 

89 # Comment 

90 continue 

91 

92 pattern, *attrs = line.split() 

93 

94 yield (pattern, {k: v for k, v in (_parse_attr(a) for a in attrs)}) 

95 

96 

97def _translate_pattern(pattern: bytes) -> bytes: 

98 """Translate a gitattributes pattern to a regular expression. 

99 

100 Similar to gitignore patterns, but simpler as gitattributes doesn't support 

101 all the same features (e.g., no directory-only patterns with trailing /). 

102 """ 

103 res = b"" 

104 i = 0 

105 n = len(pattern) 

106 

107 # If pattern doesn't contain /, it can match at any level 

108 if b"/" not in pattern: 

109 res = b"(?:.*/)??" 

110 elif pattern.startswith(b"/"): 

111 # Leading / means root of repository 

112 pattern = pattern[1:] 

113 n = len(pattern) 

114 

115 while i < n: 

116 c = pattern[i : i + 1] 

117 i += 1 

118 

119 if c == b"*": 

120 if i < n and pattern[i : i + 1] == b"*": 

121 # Double asterisk 

122 i += 1 

123 if i < n and pattern[i : i + 1] == b"/": 

124 # **/ - match zero or more directories 

125 res += b"(?:.*/)??" 

126 i += 1 

127 elif i == n: 

128 # ** at end - match everything 

129 res += b".*" 

130 else: 

131 # ** in middle 

132 res += b".*" 

133 else: 

134 # Single * - match any character except / 

135 res += b"[^/]*" 

136 elif c == b"?": 

137 res += b"[^/]" 

138 elif c == b"[": 

139 # Character class 

140 j = i 

141 if j < n and pattern[j : j + 1] == b"!": 

142 j += 1 

143 if j < n and pattern[j : j + 1] == b"]": 

144 j += 1 

145 while j < n and pattern[j : j + 1] != b"]": 

146 j += 1 

147 if j >= n: 

148 res += b"\\[" 

149 else: 

150 stuff = pattern[i:j].replace(b"\\", b"\\\\") 

151 i = j + 1 

152 if stuff.startswith(b"!"): 

153 stuff = b"^" + stuff[1:] 

154 elif stuff.startswith(b"^"): 

155 stuff = b"\\" + stuff 

156 res += b"[" + stuff + b"]" 

157 else: 

158 res += re.escape(c) 

159 

160 return res 

161 

162 

163class Pattern: 

164 """A single gitattributes pattern.""" 

165 

166 def __init__(self, pattern: bytes): 

167 """Initialize GitAttributesPattern. 

168 

169 Args: 

170 pattern: Attribute pattern as bytes 

171 """ 

172 self.pattern = pattern 

173 self._regex: Optional[re.Pattern[bytes]] = None 

174 self._compile() 

175 

176 def _compile(self) -> None: 

177 """Compile the pattern to a regular expression.""" 

178 regex_pattern = _translate_pattern(self.pattern) 

179 # Add anchors 

180 regex_pattern = b"^" + regex_pattern + b"$" 

181 self._regex = re.compile(regex_pattern) 

182 

183 def match(self, path: bytes) -> bool: 

184 """Check if path matches this pattern. 

185 

186 Args: 

187 path: Path to check (relative to repository root, using / separators) 

188 

189 Returns: 

190 True if path matches this pattern 

191 """ 

192 # Normalize path 

193 if path.startswith(b"/"): 

194 path = path[1:] 

195 

196 # Try to match 

197 assert self._regex is not None # Always set by _compile() 

198 return bool(self._regex.match(path)) 

199 

200 

201def match_path( 

202 patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]], path: bytes 

203) -> dict[bytes, AttributeValue]: 

204 """Get attributes for a path by matching against patterns. 

205 

206 Args: 

207 patterns: List of (Pattern, attributes) tuples 

208 path: Path to match (relative to repository root) 

209 

210 Returns: 

211 Dictionary of attributes that apply to this path 

212 """ 

213 attributes: dict[bytes, AttributeValue] = {} 

214 

215 # Later patterns override earlier ones 

216 for pattern, attrs in patterns: 

217 if pattern.match(path): 

218 # Update attributes 

219 for name, value in attrs.items(): 

220 if value is None: 

221 # Unspecified - remove the attribute 

222 attributes.pop(name, None) 

223 else: 

224 attributes[name] = value 

225 

226 return attributes 

227 

228 

229def parse_gitattributes_file( 

230 filename: Union[str, bytes], 

231) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

232 """Parse a gitattributes file and return compiled patterns. 

233 

234 Args: 

235 filename: Path to the .gitattributes file 

236 

237 Returns: 

238 List of (Pattern, attributes) tuples 

239 """ 

240 patterns = [] 

241 

242 if isinstance(filename, str): 

243 filename = filename.encode("utf-8") 

244 

245 with open(filename, "rb") as f: 

246 for pattern_bytes, attrs in parse_git_attributes(f): 

247 pattern = Pattern(pattern_bytes) 

248 patterns.append((pattern, attrs)) 

249 

250 return patterns 

251 

252 

253def read_gitattributes( 

254 path: Union[str, bytes], 

255) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]: 

256 """Read .gitattributes from a directory. 

257 

258 Args: 

259 path: Directory path to check for .gitattributes 

260 

261 Returns: 

262 List of (Pattern, attributes) tuples 

263 """ 

264 if isinstance(path, bytes): 

265 path = path.decode("utf-8") 

266 

267 gitattributes_path = os.path.join(path, ".gitattributes") 

268 if os.path.exists(gitattributes_path): 

269 return parse_gitattributes_file(gitattributes_path) 

270 

271 return [] 

272 

273 

274class GitAttributes: 

275 """A collection of gitattributes patterns that can match paths.""" 

276 

277 def __init__( 

278 self, 

279 patterns: Optional[list[tuple[Pattern, Mapping[bytes, AttributeValue]]]] = None, 

280 ): 

281 """Initialize GitAttributes. 

282 

283 Args: 

284 patterns: Optional list of (Pattern, attributes) tuples 

285 """ 

286 self._patterns = patterns or [] 

287 

288 def match_path(self, path: bytes) -> dict[bytes, AttributeValue]: 

289 """Get attributes for a path by matching against patterns. 

290 

291 Args: 

292 path: Path to match (relative to repository root) 

293 

294 Returns: 

295 Dictionary of attributes that apply to this path 

296 """ 

297 return match_path(self._patterns, path) 

298 

299 def add_patterns( 

300 self, patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] 

301 ) -> None: 

302 """Add patterns to the collection. 

303 

304 Args: 

305 patterns: List of (Pattern, attributes) tuples to add 

306 """ 

307 self._patterns.extend(patterns) 

308 

309 def __len__(self) -> int: 

310 """Return the number of patterns.""" 

311 return len(self._patterns) 

312 

313 def __iter__(self) -> Iterator[tuple["Pattern", Mapping[bytes, AttributeValue]]]: 

314 """Iterate over patterns.""" 

315 return iter(self._patterns) 

316 

317 @classmethod 

318 def from_file(cls, filename: Union[str, bytes]) -> "GitAttributes": 

319 """Create GitAttributes from a gitattributes file. 

320 

321 Args: 

322 filename: Path to the .gitattributes file 

323 

324 Returns: 

325 New GitAttributes instance 

326 """ 

327 patterns = parse_gitattributes_file(filename) 

328 return cls(patterns) 

329 

330 @classmethod 

331 def from_path(cls, path: Union[str, bytes]) -> "GitAttributes": 

332 """Create GitAttributes from .gitattributes in a directory. 

333 

334 Args: 

335 path: Directory path to check for .gitattributes 

336 

337 Returns: 

338 New GitAttributes instance 

339 """ 

340 patterns = read_gitattributes(path) 

341 return cls(patterns) 

342 

343 def set_attribute(self, pattern: bytes, name: bytes, value: AttributeValue) -> None: 

344 """Set an attribute for a pattern. 

345 

346 Args: 

347 pattern: The file pattern 

348 name: Attribute name 

349 value: Attribute value (bytes, True, False, or None) 

350 """ 

351 # Find existing pattern 

352 pattern_obj = None 

353 attrs_dict: Optional[dict[bytes, AttributeValue]] = None 

354 pattern_index = -1 

355 

356 for i, (p, attrs) in enumerate(self._patterns): 

357 if p.pattern == pattern: 

358 pattern_obj = p 

359 # Convert to mutable dict 

360 attrs_dict = dict(attrs) 

361 pattern_index = i 

362 break 

363 

364 if pattern_obj is None: 

365 # Create new pattern 

366 pattern_obj = Pattern(pattern) 

367 attrs_dict = {name: value} 

368 self._patterns.append((pattern_obj, attrs_dict)) 

369 else: 

370 # Update the existing pattern in the list 

371 assert pattern_index >= 0 

372 assert attrs_dict is not None 

373 self._patterns[pattern_index] = (pattern_obj, attrs_dict) 

374 

375 # Update the attribute 

376 if attrs_dict is None: 

377 raise AssertionError("attrs_dict should not be None at this point") 

378 attrs_dict[name] = value 

379 

380 def remove_pattern(self, pattern: bytes) -> None: 

381 """Remove all attributes for a pattern. 

382 

383 Args: 

384 pattern: The file pattern to remove 

385 """ 

386 self._patterns = [ 

387 (p, attrs) for p, attrs in self._patterns if p.pattern != pattern 

388 ] 

389 

390 def to_bytes(self) -> bytes: 

391 """Convert GitAttributes to bytes format suitable for writing to file. 

392 

393 Returns: 

394 Bytes representation of the gitattributes file 

395 """ 

396 lines = [] 

397 for pattern_obj, attrs in self._patterns: 

398 pattern = pattern_obj.pattern 

399 attr_strs = [] 

400 

401 for name, value in sorted(attrs.items()): 

402 if value is True: 

403 attr_strs.append(name) 

404 elif value is False: 

405 attr_strs.append(b"-" + name) 

406 elif value is None: 

407 attr_strs.append(b"!" + name) 

408 else: 

409 # value is bytes 

410 attr_strs.append(name + b"=" + value) 

411 

412 if attr_strs: 

413 line = pattern + b" " + b" ".join(attr_strs) 

414 lines.append(line) 

415 

416 return b"\n".join(lines) + b"\n" if lines else b"" 

417 

418 def write_to_file(self, filename: Union[str, bytes]) -> None: 

419 """Write GitAttributes to a file. 

420 

421 Args: 

422 filename: Path to write the .gitattributes file 

423 """ 

424 if isinstance(filename, str): 

425 filename = filename.encode("utf-8") 

426 

427 content = self.to_bytes() 

428 with open(filename, "wb") as f: 

429 f.write(content)