Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/computation/parsing.py: 27%

1"""

2:func:`~pandas.eval` source string parsing functions

3"""

4from __future__ import annotations

6from io import StringIO

7from keyword import iskeyword

8import token

9import tokenize

10from typing import (

11 Hashable,

12 Iterator,

13)

15# A token value Python's tokenizer probably will never use.

16BACKTICK_QUOTED_STRING = 100

19def create_valid_python_identifier(name: str) -> str:

20 """

21 Create valid Python identifiers from any string.

23 Check if name contains any special characters. If it contains any

24 special characters, the special characters will be replaced by

25 a special string and a prefix is added.

27 Raises

28 ------

29 SyntaxError

30 If the returned name is not a Python valid identifier, raise an exception.

31 This can happen if there is a hashtag in the name, as the tokenizer will

32 than terminate and not find the backtick.

33 But also for characters that fall out of the range of (U+0001..U+007F).

34 """

35 if name.isidentifier() and not iskeyword(name):

36 return name

38 # Create a dict with the special characters and their replacement string.

39 # EXACT_TOKEN_TYPES contains these special characters

40 # token.tok_name contains a readable description of the replacement string.

41 special_characters_replacements = {

42 char: f"_{token.tok_name[tokval]}_"

43 for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items())

44 }

45 special_characters_replacements.update(

46 {

47 " ": "_",

48 "?": "_QUESTIONMARK_",

49 "!": "_EXCLAMATIONMARK_",

50 "$": "_DOLLARSIGN_",

51 "€": "_EUROSIGN_",

52 "°": "_DEGREESIGN_",

53 # Including quotes works, but there are exceptions.

54 "'": "_SINGLEQUOTE_",

55 '"': "_DOUBLEQUOTE_",

56 # Currently not possible. Terminates parser and won't find backtick.

57 # "#": "_HASH_",

58 }

59 )

61 name = "".join([special_characters_replacements.get(char, char) for char in name])

62 name = f"BACKTICK_QUOTED_STRING_{name}"

64 if not name.isidentifier():

65 raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.")

67 return name

70def clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]:

71 """

72 Clean up a column name if surrounded by backticks.

74 Backtick quoted string are indicated by a certain tokval value. If a string

75 is a backtick quoted token it will processed by

76 :func:`_create_valid_python_identifier` so that the parser can find this

77 string when the query is executed.

78 In this case the tok will get the NAME tokval.

80 Parameters

81 ----------

82 tok : tuple of int, str

83 ints correspond to the all caps constants in the tokenize module

85 Returns

86 -------

87 tok : Tuple[int, str]

88 Either the input or token or the replacement values

89 """

90 toknum, tokval = tok

91 if toknum == BACKTICK_QUOTED_STRING:

92 return tokenize.NAME, create_valid_python_identifier(tokval)

93 return toknum, tokval

96def clean_column_name(name: Hashable) -> Hashable:

97 """

98 Function to emulate the cleaning of a backtick quoted name.

100 The purpose for this function is to see what happens to the name of

101 identifier if it goes to the process of being parsed a Python code

102 inside a backtick quoted string and than being cleaned

103 (removed of any special characters).

104

105 Parameters

106 ----------

107 name : hashable

108 Name to be cleaned.

109

110 Returns

111 -------

112 name : hashable

113 Returns the name after tokenizing and cleaning.

114

115 Notes

116 -----

117 For some cases, a name cannot be converted to a valid Python identifier.

118 In that case :func:`tokenize_string` raises a SyntaxError.

119 In that case, we just return the name unmodified.

120

121 If this name was used in the query string (this makes the query call impossible)

122 an error will be raised by :func:`tokenize_backtick_quoted_string` instead,

123 which is not caught and propagates to the user level.

124 """

125 try:

126 tokenized = tokenize_string(f"`{name}`")

127 tokval = next(tokenized)[1]

128 return create_valid_python_identifier(tokval)

129 except SyntaxError:

130 return name

131

132

133def tokenize_backtick_quoted_string(

134 token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int

135) -> tuple[int, str]:

136 """

137 Creates a token from a backtick quoted string.

138

139 Moves the token_generator forwards till right after the next backtick.

140

141 Parameters

142 ----------

143 token_generator : Iterator[tokenize.TokenInfo]

144 The generator that yields the tokens of the source string (Tuple[int, str]).

145 The generator is at the first token after the backtick (`)

146

147 source : str

148 The Python source code string.

149

150 string_start : int

151 This is the start of backtick quoted string inside the source string.

152

153 Returns

154 -------

155 tok: Tuple[int, str]

156 The token that represents the backtick quoted string.

157 The integer is equal to BACKTICK_QUOTED_STRING (100).

158 """

159 for _, tokval, start, _, _ in token_generator:

160 if tokval == "`":

161 string_end = start[1]

162 break

163

164 return BACKTICK_QUOTED_STRING, source[string_start:string_end]

165

166

167def tokenize_string(source: str) -> Iterator[tuple[int, str]]:

168 """

169 Tokenize a Python source code string.

170

171 Parameters

172 ----------

173 source : str

174 The Python source code string.

175

176 Returns

177 -------

178 tok_generator : Iterator[Tuple[int, str]]

179 An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).

180 """

181 line_reader = StringIO(source).readline

182 token_generator = tokenize.generate_tokens(line_reader)

183

184 # Loop over all tokens till a backtick (`) is found.

185 # Then, take all tokens till the next backtick to form a backtick quoted string

186 for toknum, tokval, start, _, _ in token_generator:

187 if tokval == "`":

188 try:

189 yield tokenize_backtick_quoted_string(

190 token_generator, source, string_start=start[1] + 1

191 )

192 except Exception as err:

193 raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err

194 else:

195 yield toknum, tokval