Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/asttokens/line_numbers.py: 33%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

30 statements  

1# Copyright 2016 Grist Labs, Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import bisect 

16import re 

17from typing import Dict, List, Tuple 

18 

19_line_start_re = re.compile(r'^', re.M) 

20 

21class LineNumbers(object): 

22 """ 

23 Class to convert between character offsets in a text string, and pairs (line, column) of 1-based 

24 line and 0-based column numbers, as used by tokens and AST nodes. 

25 

26 This class expects unicode for input and stores positions in unicode. But it supports 

27 translating to and from utf8 offsets, which are used by ast parsing. 

28 """ 

29 def __init__(self, text): 

30 # type: (str) -> None 

31 # A list of character offsets of each line's first character. 

32 self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)] 

33 self._text = text 

34 self._text_len = len(text) 

35 self._utf8_offset_cache = {} # type: Dict[int, List[int]] # maps line num to list of char offset for each byte in line 

36 

37 def from_utf8_col(self, line, utf8_column): 

38 # type: (int, int) -> int 

39 """ 

40 Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column. 

41 """ 

42 offsets = self._utf8_offset_cache.get(line) 

43 if offsets is None: 

44 end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len 

45 line_text = self._text[self._line_offsets[line - 1] : end_offset] 

46 

47 offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')] 

48 offsets.append(len(line_text)) 

49 self._utf8_offset_cache[line] = offsets 

50 

51 return offsets[max(0, min(len(offsets)-1, utf8_column))] 

52 

53 def line_to_offset(self, line, column): 

54 # type: (int, int) -> int 

55 """ 

56 Converts 1-based line number and 0-based column to 0-based character offset into text. 

57 """ 

58 line -= 1 

59 if line >= len(self._line_offsets): 

60 return self._text_len 

61 elif line < 0: 

62 return 0 

63 else: 

64 return min(self._line_offsets[line] + max(0, column), self._text_len) 

65 

66 def offset_to_line(self, offset): 

67 # type: (int) -> Tuple[int, int] 

68 """ 

69 Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column 

70 numbers. 

71 """ 

72 offset = max(0, min(self._text_len, offset)) 

73 line_index = bisect.bisect_right(self._line_offsets, offset) - 1 

74 return (line_index + 1, offset - self._line_offsets[line_index]) 

75 

76