Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/attrs.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# attrs.py -- Git attributes for dulwich
2# Copyright (C) 2019-2020 Collabora Ltd
3# Copyright (C) 2019-2020 Andrej Shadura <andrew.shadura@collabora.co.uk>
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as published by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
22"""Parse .gitattributes file."""
24import os
25import re
26from collections.abc import Generator, Iterator, Mapping, Sequence
27from typing import IO
29AttributeValue = bytes | bool | None
32def _parse_attr(attr: bytes) -> tuple[bytes, AttributeValue]:
33 """Parse a git attribute into its value.
35 >>> _parse_attr(b'attr')
36 (b'attr', True)
37 >>> _parse_attr(b'-attr')
38 (b'attr', False)
39 >>> _parse_attr(b'!attr')
40 (b'attr', None)
41 >>> _parse_attr(b'attr=text')
42 (b'attr', b'text')
43 """
44 if attr.startswith(b"!"):
45 return attr[1:], None
46 if attr.startswith(b"-"):
47 return attr[1:], False
48 if b"=" not in attr:
49 return attr, True
50 # Split only on first = to handle values with = in them
51 name, _, value = attr.partition(b"=")
52 return name, value
55def parse_git_attributes(
56 f: IO[bytes],
57) -> Generator[tuple[bytes, Mapping[bytes, AttributeValue]], None, None]:
58 """Parse a Git attributes string.
60 Args:
61 f: File-like object to read bytes from
62 Returns:
63 List of patterns and corresponding patterns in the order or them being encountered
64 >>> from io import BytesIO
65 >>> list(parse_git_attributes(BytesIO(b'''*.tar.* filter=lfs diff=lfs merge=lfs -text
66 ...
67 ... # store signatures in Git
68 ... *.tar.*.asc -filter -diff merge=binary -text
69 ...
70 ... # store .dsc verbatim
71 ... *.dsc -filter !diff merge=binary !text
72 ... '''))) #doctest: +NORMALIZE_WHITESPACE
73 [(b'*.tar.*', {'filter': 'lfs', 'diff': 'lfs', 'merge': 'lfs', 'text': False}),
74 (b'*.tar.*.asc', {'filter': False, 'diff': False, 'merge': 'binary', 'text': False}),
75 (b'*.dsc', {'filter': False, 'diff': None, 'merge': 'binary', 'text': None})]
76 """
77 for line in f:
78 line = line.strip()
80 # Ignore blank lines, they're used for readability.
81 if not line:
82 continue
84 if line.startswith(b"#"):
85 # Comment
86 continue
88 pattern, *attrs = line.split()
90 yield (pattern, {k: v for k, v in (_parse_attr(a) for a in attrs)})
93def _translate_pattern(pattern: bytes) -> bytes:
94 """Translate a gitattributes pattern to a regular expression.
96 Similar to gitignore patterns, but simpler as gitattributes doesn't support
97 all the same features (e.g., no directory-only patterns with trailing /).
98 """
99 res = b""
100 i = 0
101 n = len(pattern)
103 # If pattern doesn't contain /, it can match at any level
104 if b"/" not in pattern:
105 res = b"(?:.*/)??"
106 elif pattern.startswith(b"/"):
107 # Leading / means root of repository
108 pattern = pattern[1:]
109 n = len(pattern)
111 while i < n:
112 c = pattern[i : i + 1]
113 i += 1
115 if c == b"*":
116 if i < n and pattern[i : i + 1] == b"*":
117 # Double asterisk
118 i += 1
119 if i < n and pattern[i : i + 1] == b"/":
120 # **/ - match zero or more directories
121 res += b"(?:.*/)??"
122 i += 1
123 elif i == n:
124 # ** at end - match everything
125 res += b".*"
126 else:
127 # ** in middle
128 res += b".*"
129 else:
130 # Single * - match any character except /
131 res += b"[^/]*"
132 elif c == b"?":
133 res += b"[^/]"
134 elif c == b"[":
135 # Character class
136 j = i
137 if j < n and pattern[j : j + 1] == b"!":
138 j += 1
139 if j < n and pattern[j : j + 1] == b"]":
140 j += 1
141 while j < n and pattern[j : j + 1] != b"]":
142 j += 1
143 if j >= n:
144 res += b"\\["
145 else:
146 stuff = pattern[i:j].replace(b"\\", b"\\\\")
147 i = j + 1
148 if stuff.startswith(b"!"):
149 stuff = b"^" + stuff[1:]
150 elif stuff.startswith(b"^"):
151 stuff = b"\\" + stuff
152 res += b"[" + stuff + b"]"
153 else:
154 res += re.escape(c)
156 return res
159class Pattern:
160 """A single gitattributes pattern."""
162 def __init__(self, pattern: bytes):
163 """Initialize GitAttributesPattern.
165 Args:
166 pattern: Attribute pattern as bytes
167 """
168 self.pattern = pattern
169 self._regex: re.Pattern[bytes] | None = None
170 self._compile()
172 def _compile(self) -> None:
173 """Compile the pattern to a regular expression."""
174 regex_pattern = _translate_pattern(self.pattern)
175 # Add anchors
176 regex_pattern = b"^" + regex_pattern + b"$"
177 self._regex = re.compile(regex_pattern)
179 def match(self, path: bytes) -> bool:
180 """Check if path matches this pattern.
182 Args:
183 path: Path to check (relative to repository root, using / separators)
185 Returns:
186 True if path matches this pattern
187 """
188 # Normalize path
189 if path.startswith(b"/"):
190 path = path[1:]
192 # Try to match
193 assert self._regex is not None # Always set by _compile()
194 return bool(self._regex.match(path))
197def match_path(
198 patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]], path: bytes
199) -> dict[bytes, AttributeValue]:
200 """Get attributes for a path by matching against patterns.
202 Args:
203 patterns: List of (Pattern, attributes) tuples
204 path: Path to match (relative to repository root)
206 Returns:
207 Dictionary of attributes that apply to this path
208 """
209 attributes: dict[bytes, AttributeValue] = {}
211 # Later patterns override earlier ones
212 for pattern, attrs in patterns:
213 if pattern.match(path):
214 # Update attributes
215 for name, value in attrs.items():
216 if value is None:
217 # Unspecified - remove the attribute
218 attributes.pop(name, None)
219 else:
220 attributes[name] = value
222 return attributes
225def parse_gitattributes_file(
226 filename: str | bytes,
227) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]:
228 """Parse a gitattributes file and return compiled patterns.
230 Args:
231 filename: Path to the .gitattributes file
233 Returns:
234 List of (Pattern, attributes) tuples
235 """
236 patterns = []
238 if isinstance(filename, str):
239 filename = filename.encode("utf-8")
241 with open(filename, "rb") as f:
242 for pattern_bytes, attrs in parse_git_attributes(f):
243 pattern = Pattern(pattern_bytes)
244 patterns.append((pattern, attrs))
246 return patterns
249def read_gitattributes(
250 path: str | bytes,
251) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]:
252 """Read .gitattributes from a directory.
254 Args:
255 path: Directory path to check for .gitattributes
257 Returns:
258 List of (Pattern, attributes) tuples
259 """
260 if isinstance(path, bytes):
261 path = path.decode("utf-8")
263 gitattributes_path = os.path.join(path, ".gitattributes")
264 if os.path.exists(gitattributes_path):
265 return parse_gitattributes_file(gitattributes_path)
267 return []
270class GitAttributes:
271 """A collection of gitattributes patterns that can match paths."""
273 def __init__(
274 self,
275 patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] | None = None,
276 ):
277 """Initialize GitAttributes.
279 Args:
280 patterns: Optional list of (Pattern, attributes) tuples
281 """
282 self._patterns = patterns or []
284 def match_path(self, path: bytes) -> dict[bytes, AttributeValue]:
285 """Get attributes for a path by matching against patterns.
287 Args:
288 path: Path to match (relative to repository root)
290 Returns:
291 Dictionary of attributes that apply to this path
292 """
293 return match_path(self._patterns, path)
295 def add_patterns(
296 self, patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]]
297 ) -> None:
298 """Add patterns to the collection.
300 Args:
301 patterns: List of (Pattern, attributes) tuples to add
302 """
303 self._patterns.extend(patterns)
305 def __len__(self) -> int:
306 """Return the number of patterns."""
307 return len(self._patterns)
309 def __iter__(self) -> Iterator[tuple["Pattern", Mapping[bytes, AttributeValue]]]:
310 """Iterate over patterns."""
311 return iter(self._patterns)
313 @classmethod
314 def from_file(cls, filename: str | bytes) -> "GitAttributes":
315 """Create GitAttributes from a gitattributes file.
317 Args:
318 filename: Path to the .gitattributes file
320 Returns:
321 New GitAttributes instance
322 """
323 patterns = parse_gitattributes_file(filename)
324 return cls(patterns)
326 @classmethod
327 def from_path(cls, path: str | bytes) -> "GitAttributes":
328 """Create GitAttributes from .gitattributes in a directory.
330 Args:
331 path: Directory path to check for .gitattributes
333 Returns:
334 New GitAttributes instance
335 """
336 patterns = read_gitattributes(path)
337 return cls(patterns)
339 def set_attribute(self, pattern: bytes, name: bytes, value: AttributeValue) -> None:
340 """Set an attribute for a pattern.
342 Args:
343 pattern: The file pattern
344 name: Attribute name
345 value: Attribute value (bytes, True, False, or None)
346 """
347 # Find existing pattern
348 pattern_obj = None
349 attrs_dict: dict[bytes, AttributeValue] | None = None
350 pattern_index = -1
352 for i, (p, attrs) in enumerate(self._patterns):
353 if p.pattern == pattern:
354 pattern_obj = p
355 # Convert to mutable dict
356 attrs_dict = dict(attrs)
357 pattern_index = i
358 break
360 if pattern_obj is None:
361 # Create new pattern
362 pattern_obj = Pattern(pattern)
363 attrs_dict = {name: value}
364 self._patterns.append((pattern_obj, attrs_dict))
365 else:
366 # Update the existing pattern in the list
367 assert pattern_index >= 0
368 assert attrs_dict is not None
369 self._patterns[pattern_index] = (pattern_obj, attrs_dict)
371 # Update the attribute
372 if attrs_dict is None:
373 raise AssertionError("attrs_dict should not be None at this point")
374 attrs_dict[name] = value
376 def remove_pattern(self, pattern: bytes) -> None:
377 """Remove all attributes for a pattern.
379 Args:
380 pattern: The file pattern to remove
381 """
382 self._patterns = [
383 (p, attrs) for p, attrs in self._patterns if p.pattern != pattern
384 ]
386 def to_bytes(self) -> bytes:
387 """Convert GitAttributes to bytes format suitable for writing to file.
389 Returns:
390 Bytes representation of the gitattributes file
391 """
392 lines = []
393 for pattern_obj, attrs in self._patterns:
394 pattern = pattern_obj.pattern
395 attr_strs = []
397 for name, value in sorted(attrs.items()):
398 if value is True:
399 attr_strs.append(name)
400 elif value is False:
401 attr_strs.append(b"-" + name)
402 elif value is None:
403 attr_strs.append(b"!" + name)
404 else:
405 # value is bytes
406 attr_strs.append(name + b"=" + value)
408 if attr_strs:
409 line = pattern + b" " + b" ".join(attr_strs)
410 lines.append(line)
412 return b"\n".join(lines) + b"\n" if lines else b""
414 def write_to_file(self, filename: str | bytes) -> None:
415 """Write GitAttributes to a file.
417 Args:
418 filename: Path to write the .gitattributes file
419 """
420 if isinstance(filename, str):
421 filename = filename.encode("utf-8")
423 content = self.to_bytes()
424 with open(filename, "wb") as f:
425 f.write(content)