Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/attrs.py: 21%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# attrs.py -- Git attributes for dulwich
2# Copyright (C) 2019-2020 Collabora Ltd
3# Copyright (C) 2019-2020 Andrej Shadura <andrew.shadura@collabora.co.uk>
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as published by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
22"""Parse .gitattributes file."""
24__all__ = [
25 "AttributeValue",
26 "GitAttributes",
27 "Pattern",
28 "match_path",
29 "parse_git_attributes",
30 "parse_gitattributes_file",
31 "read_gitattributes",
32]
34import os
35import re
36from collections.abc import Generator, Iterator, Mapping, Sequence
37from typing import IO
39AttributeValue = bytes | bool | None
42def _parse_attr(attr: bytes) -> tuple[bytes, AttributeValue]:
43 """Parse a git attribute into its value.
45 >>> _parse_attr(b'attr')
46 (b'attr', True)
47 >>> _parse_attr(b'-attr')
48 (b'attr', False)
49 >>> _parse_attr(b'!attr')
50 (b'attr', None)
51 >>> _parse_attr(b'attr=text')
52 (b'attr', b'text')
53 """
54 if attr.startswith(b"!"):
55 return attr[1:], None
56 if attr.startswith(b"-"):
57 return attr[1:], False
58 if b"=" not in attr:
59 return attr, True
60 # Split only on first = to handle values with = in them
61 name, _, value = attr.partition(b"=")
62 return name, value
65def parse_git_attributes(
66 f: IO[bytes],
67) -> Generator[tuple[bytes, Mapping[bytes, AttributeValue]], None, None]:
68 """Parse a Git attributes string.
70 Args:
71 f: File-like object to read bytes from
72 Returns:
73 List of patterns and corresponding patterns in the order or them being encountered
74 >>> from io import BytesIO
75 >>> list(parse_git_attributes(BytesIO(b'''*.tar.* filter=lfs diff=lfs merge=lfs -text
76 ...
77 ... # store signatures in Git
78 ... *.tar.*.asc -filter -diff merge=binary -text
79 ...
80 ... # store .dsc verbatim
81 ... *.dsc -filter !diff merge=binary !text
82 ... '''))) #doctest: +NORMALIZE_WHITESPACE
83 [(b'*.tar.*', {'filter': 'lfs', 'diff': 'lfs', 'merge': 'lfs', 'text': False}),
84 (b'*.tar.*.asc', {'filter': False, 'diff': False, 'merge': 'binary', 'text': False}),
85 (b'*.dsc', {'filter': False, 'diff': None, 'merge': 'binary', 'text': None})]
86 """
87 for line in f:
88 line = line.strip()
90 # Ignore blank lines, they're used for readability.
91 if not line:
92 continue
94 if line.startswith(b"#"):
95 # Comment
96 continue
98 pattern, *attrs = line.split()
100 yield (pattern, {k: v for k, v in (_parse_attr(a) for a in attrs)})
103def _translate_pattern(pattern: bytes) -> bytes:
104 """Translate a gitattributes pattern to a regular expression.
106 Similar to gitignore patterns, but simpler as gitattributes doesn't support
107 all the same features (e.g., no directory-only patterns with trailing /).
108 """
109 res = b""
110 i = 0
111 n = len(pattern)
113 # If pattern doesn't contain /, it can match at any level
114 if b"/" not in pattern:
115 res = b"(?:.*/)??"
116 elif pattern.startswith(b"/"):
117 # Leading / means root of repository
118 pattern = pattern[1:]
119 n = len(pattern)
121 while i < n:
122 c = pattern[i : i + 1]
123 i += 1
125 if c == b"*":
126 if i < n and pattern[i : i + 1] == b"*":
127 # Double asterisk
128 i += 1
129 if i < n and pattern[i : i + 1] == b"/":
130 # **/ - match zero or more directories
131 res += b"(?:.*/)??"
132 i += 1
133 elif i == n:
134 # ** at end - match everything
135 res += b".*"
136 else:
137 # ** in middle
138 res += b".*"
139 else:
140 # Single * - match any character except /
141 res += b"[^/]*"
142 elif c == b"?":
143 res += b"[^/]"
144 elif c == b"[":
145 # Character class
146 j = i
147 if j < n and pattern[j : j + 1] == b"!":
148 j += 1
149 if j < n and pattern[j : j + 1] == b"]":
150 j += 1
151 while j < n and pattern[j : j + 1] != b"]":
152 j += 1
153 if j >= n:
154 res += b"\\["
155 else:
156 stuff = pattern[i:j].replace(b"\\", b"\\\\")
157 i = j + 1
158 if stuff.startswith(b"!"):
159 stuff = b"^" + stuff[1:]
160 elif stuff.startswith(b"^"):
161 stuff = b"\\" + stuff
162 res += b"[" + stuff + b"]"
163 else:
164 res += re.escape(c)
166 return res
169class Pattern:
170 """A single gitattributes pattern."""
172 def __init__(self, pattern: bytes):
173 """Initialize GitAttributesPattern.
175 Args:
176 pattern: Attribute pattern as bytes
177 """
178 self.pattern = pattern
179 self._regex: re.Pattern[bytes] | None = None
180 self._compile()
182 def _compile(self) -> None:
183 """Compile the pattern to a regular expression."""
184 regex_pattern = _translate_pattern(self.pattern)
185 # Add anchors
186 regex_pattern = b"^" + regex_pattern + b"$"
187 self._regex = re.compile(regex_pattern)
189 def match(self, path: bytes) -> bool:
190 """Check if path matches this pattern.
192 Args:
193 path: Path to check (relative to repository root, using / separators)
195 Returns:
196 True if path matches this pattern
197 """
198 # Normalize path
199 if path.startswith(b"/"):
200 path = path[1:]
202 # Try to match
203 assert self._regex is not None # Always set by _compile()
204 return bool(self._regex.match(path))
207def match_path(
208 patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]], path: bytes
209) -> dict[bytes, AttributeValue]:
210 """Get attributes for a path by matching against patterns.
212 Args:
213 patterns: List of (Pattern, attributes) tuples
214 path: Path to match (relative to repository root)
216 Returns:
217 Dictionary of attributes that apply to this path
218 """
219 attributes: dict[bytes, AttributeValue] = {}
221 # Later patterns override earlier ones
222 for pattern, attrs in patterns:
223 if pattern.match(path):
224 # Update attributes
225 for name, value in attrs.items():
226 if value is None:
227 # Unspecified - remove the attribute
228 attributes.pop(name, None)
229 else:
230 attributes[name] = value
232 return attributes
235def parse_gitattributes_file(
236 filename: str | bytes,
237) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]:
238 """Parse a gitattributes file and return compiled patterns.
240 Args:
241 filename: Path to the .gitattributes file
243 Returns:
244 List of (Pattern, attributes) tuples
245 """
246 patterns = []
248 if isinstance(filename, str):
249 filename = filename.encode("utf-8")
251 with open(filename, "rb") as f:
252 for pattern_bytes, attrs in parse_git_attributes(f):
253 pattern = Pattern(pattern_bytes)
254 patterns.append((pattern, attrs))
256 return patterns
259def read_gitattributes(
260 path: str | bytes,
261) -> list[tuple[Pattern, Mapping[bytes, AttributeValue]]]:
262 """Read .gitattributes from a directory.
264 Args:
265 path: Directory path to check for .gitattributes
267 Returns:
268 List of (Pattern, attributes) tuples
269 """
270 if isinstance(path, bytes):
271 path = path.decode("utf-8")
273 gitattributes_path = os.path.join(path, ".gitattributes")
274 if os.path.exists(gitattributes_path):
275 return parse_gitattributes_file(gitattributes_path)
277 return []
280class GitAttributes:
281 """A collection of gitattributes patterns that can match paths."""
283 def __init__(
284 self,
285 patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] | None = None,
286 ):
287 """Initialize GitAttributes.
289 Args:
290 patterns: Optional list of (Pattern, attributes) tuples
291 """
292 self._patterns = patterns or []
294 def match_path(self, path: bytes) -> dict[bytes, AttributeValue]:
295 """Get attributes for a path by matching against patterns.
297 Args:
298 path: Path to match (relative to repository root)
300 Returns:
301 Dictionary of attributes that apply to this path
302 """
303 return match_path(self._patterns, path)
305 def add_patterns(
306 self, patterns: Sequence[tuple[Pattern, Mapping[bytes, AttributeValue]]]
307 ) -> None:
308 """Add patterns to the collection.
310 Args:
311 patterns: List of (Pattern, attributes) tuples to add
312 """
313 self._patterns.extend(patterns)
315 def __len__(self) -> int:
316 """Return the number of patterns."""
317 return len(self._patterns)
319 def __iter__(self) -> Iterator[tuple["Pattern", Mapping[bytes, AttributeValue]]]:
320 """Iterate over patterns."""
321 return iter(self._patterns)
323 @classmethod
324 def from_file(cls, filename: str | bytes) -> "GitAttributes":
325 """Create GitAttributes from a gitattributes file.
327 Args:
328 filename: Path to the .gitattributes file
330 Returns:
331 New GitAttributes instance
332 """
333 patterns = parse_gitattributes_file(filename)
334 return cls(patterns)
336 @classmethod
337 def from_path(cls, path: str | bytes) -> "GitAttributes":
338 """Create GitAttributes from .gitattributes in a directory.
340 Args:
341 path: Directory path to check for .gitattributes
343 Returns:
344 New GitAttributes instance
345 """
346 patterns = read_gitattributes(path)
347 return cls(patterns)
349 def set_attribute(self, pattern: bytes, name: bytes, value: AttributeValue) -> None:
350 """Set an attribute for a pattern.
352 Args:
353 pattern: The file pattern
354 name: Attribute name
355 value: Attribute value (bytes, True, False, or None)
356 """
357 # Find existing pattern
358 pattern_obj = None
359 attrs_dict: dict[bytes, AttributeValue] | None = None
360 pattern_index = -1
362 for i, (p, attrs) in enumerate(self._patterns):
363 if p.pattern == pattern:
364 pattern_obj = p
365 # Convert to mutable dict
366 attrs_dict = dict(attrs)
367 pattern_index = i
368 break
370 if pattern_obj is None:
371 # Create new pattern
372 pattern_obj = Pattern(pattern)
373 attrs_dict = {name: value}
374 self._patterns.append((pattern_obj, attrs_dict))
375 else:
376 # Update the existing pattern in the list
377 assert pattern_index >= 0
378 assert attrs_dict is not None
379 self._patterns[pattern_index] = (pattern_obj, attrs_dict)
381 # Update the attribute
382 if attrs_dict is None:
383 raise AssertionError("attrs_dict should not be None at this point")
384 attrs_dict[name] = value
386 def remove_pattern(self, pattern: bytes) -> None:
387 """Remove all attributes for a pattern.
389 Args:
390 pattern: The file pattern to remove
391 """
392 self._patterns = [
393 (p, attrs) for p, attrs in self._patterns if p.pattern != pattern
394 ]
396 def to_bytes(self) -> bytes:
397 """Convert GitAttributes to bytes format suitable for writing to file.
399 Returns:
400 Bytes representation of the gitattributes file
401 """
402 lines = []
403 for pattern_obj, attrs in self._patterns:
404 pattern = pattern_obj.pattern
405 attr_strs = []
407 for name, value in sorted(attrs.items()):
408 if value is True:
409 attr_strs.append(name)
410 elif value is False:
411 attr_strs.append(b"-" + name)
412 elif value is None:
413 attr_strs.append(b"!" + name)
414 else:
415 # value is bytes
416 attr_strs.append(name + b"=" + value)
418 if attr_strs:
419 line = pattern + b" " + b" ".join(attr_strs)
420 lines.append(line)
422 return b"\n".join(lines) + b"\n" if lines else b""
424 def write_to_file(self, filename: str | bytes) -> None:
425 """Write GitAttributes to a file.
427 Args:
428 filename: Path to write the .gitattributes file
429 """
430 if isinstance(filename, str):
431 filename = filename.encode("utf-8")
433 content = self.to_bytes()
434 with open(filename, "wb") as f:
435 f.write(content)