Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_harwell_boeing/_fortran_format_parser.py: 25%
163 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1"""
2Preliminary module to handle Fortran formats for IO. Does not use this outside
3scipy.sparse io for now, until the API is deemed reasonable.
5The *Format classes handle conversion between Fortran and Python format, and
6FortranFormatParser can create *Format instances from raw Fortran format
7strings (e.g. '(3I4)', '(10I3)', etc...)
8"""
9import re
11import numpy as np
14__all__ = ["BadFortranFormat", "FortranFormatParser", "IntFormat", "ExpFormat"]
17TOKENS = {
18 "LPAR": r"\(",
19 "RPAR": r"\)",
20 "INT_ID": r"I",
21 "EXP_ID": r"E",
22 "INT": r"\d+",
23 "DOT": r"\.",
24}
27class BadFortranFormat(SyntaxError):
28 pass
31def number_digits(n):
32 return int(np.floor(np.log10(np.abs(n))) + 1)
35class IntFormat:
36 @classmethod
37 def from_number(cls, n, min=None):
38 """Given an integer, returns a "reasonable" IntFormat instance to represent
39 any number between 0 and n if n > 0, -n and n if n < 0
41 Parameters
42 ----------
43 n : int
44 max number one wants to be able to represent
45 min : int
46 minimum number of characters to use for the format
48 Returns
49 -------
50 res : IntFormat
51 IntFormat instance with reasonable (see Notes) computed width
53 Notes
54 -----
55 Reasonable should be understood as the minimal string length necessary
56 without losing precision. For example, IntFormat.from_number(1) will
57 return an IntFormat instance of width 2, so that any 0 and 1 may be
58 represented as 1-character strings without loss of information.
59 """
60 width = number_digits(n) + 1
61 if n < 0:
62 width += 1
63 repeat = 80 // width
64 return cls(width, min, repeat=repeat)
66 def __init__(self, width, min=None, repeat=None):
67 self.width = width
68 self.repeat = repeat
69 self.min = min
71 def __repr__(self):
72 r = "IntFormat("
73 if self.repeat:
74 r += "%d" % self.repeat
75 r += "I%d" % self.width
76 if self.min:
77 r += ".%d" % self.min
78 return r + ")"
80 @property
81 def fortran_format(self):
82 r = "("
83 if self.repeat:
84 r += "%d" % self.repeat
85 r += "I%d" % self.width
86 if self.min:
87 r += ".%d" % self.min
88 return r + ")"
90 @property
91 def python_format(self):
92 return "%" + str(self.width) + "d"
95class ExpFormat:
96 @classmethod
97 def from_number(cls, n, min=None):
98 """Given a float number, returns a "reasonable" ExpFormat instance to
99 represent any number between -n and n.
101 Parameters
102 ----------
103 n : float
104 max number one wants to be able to represent
105 min : int
106 minimum number of characters to use for the format
108 Returns
109 -------
110 res : ExpFormat
111 ExpFormat instance with reasonable (see Notes) computed width
113 Notes
114 -----
115 Reasonable should be understood as the minimal string length necessary
116 to avoid losing precision.
117 """
118 # len of one number in exp format: sign + 1|0 + "." +
119 # number of digit for fractional part + 'E' + sign of exponent +
120 # len of exponent
121 finfo = np.finfo(n.dtype)
122 # Number of digits for fractional part
123 n_prec = finfo.precision + 1
124 # Number of digits for exponential part
125 n_exp = number_digits(np.max(np.abs([finfo.maxexp, finfo.minexp])))
126 width = 1 + 1 + n_prec + 1 + n_exp + 1
127 if n < 0:
128 width += 1
129 repeat = int(np.floor(80 / width))
130 return cls(width, n_prec, min, repeat=repeat)
132 def __init__(self, width, significand, min=None, repeat=None):
133 """\
134 Parameters
135 ----------
136 width : int
137 number of characters taken by the string (includes space).
138 """
139 self.width = width
140 self.significand = significand
141 self.repeat = repeat
142 self.min = min
144 def __repr__(self):
145 r = "ExpFormat("
146 if self.repeat:
147 r += "%d" % self.repeat
148 r += "E%d.%d" % (self.width, self.significand)
149 if self.min:
150 r += "E%d" % self.min
151 return r + ")"
153 @property
154 def fortran_format(self):
155 r = "("
156 if self.repeat:
157 r += "%d" % self.repeat
158 r += "E%d.%d" % (self.width, self.significand)
159 if self.min:
160 r += "E%d" % self.min
161 return r + ")"
163 @property
164 def python_format(self):
165 return "%" + str(self.width-1) + "." + str(self.significand) + "E"
168class Token:
169 def __init__(self, type, value, pos):
170 self.type = type
171 self.value = value
172 self.pos = pos
174 def __str__(self):
175 return f"""Token('{self.type}', "{self.value}")"""
177 def __repr__(self):
178 return self.__str__()
181class Tokenizer:
182 def __init__(self):
183 self.tokens = list(TOKENS.keys())
184 self.res = [re.compile(TOKENS[i]) for i in self.tokens]
186 def input(self, s):
187 self.data = s
188 self.curpos = 0
189 self.len = len(s)
191 def next_token(self):
192 curpos = self.curpos
194 while curpos < self.len:
195 for i, r in enumerate(self.res):
196 m = r.match(self.data, curpos)
197 if m is None:
198 continue
199 else:
200 self.curpos = m.end()
201 return Token(self.tokens[i], m.group(), self.curpos)
202 raise SyntaxError("Unknown character at position %d (%s)"
203 % (self.curpos, self.data[curpos]))
206# Grammar for fortran format:
207# format : LPAR format_string RPAR
208# format_string : repeated | simple
209# repeated : repeat simple
210# simple : int_fmt | exp_fmt
211# int_fmt : INT_ID width
212# exp_fmt : simple_exp_fmt
213# simple_exp_fmt : EXP_ID width DOT significand
214# extended_exp_fmt : EXP_ID width DOT significand EXP_ID ndigits
215# repeat : INT
216# width : INT
217# significand : INT
218# ndigits : INT
220# Naive fortran formatter - parser is hand-made
221class FortranFormatParser:
222 """Parser for Fortran format strings. The parse method returns a *Format
223 instance.
225 Notes
226 -----
227 Only ExpFormat (exponential format for floating values) and IntFormat
228 (integer format) for now.
229 """
230 def __init__(self):
231 self.tokenizer = Tokenizer()
233 def parse(self, s):
234 self.tokenizer.input(s)
236 tokens = []
238 try:
239 while True:
240 t = self.tokenizer.next_token()
241 if t is None:
242 break
243 else:
244 tokens.append(t)
245 return self._parse_format(tokens)
246 except SyntaxError as e:
247 raise BadFortranFormat(str(e)) from e
249 def _get_min(self, tokens):
250 next = tokens.pop(0)
251 if not next.type == "DOT":
252 raise SyntaxError()
253 next = tokens.pop(0)
254 return next.value
256 def _expect(self, token, tp):
257 if not token.type == tp:
258 raise SyntaxError()
260 def _parse_format(self, tokens):
261 if not tokens[0].type == "LPAR":
262 raise SyntaxError("Expected left parenthesis at position "
263 "%d (got '%s')" % (0, tokens[0].value))
264 elif not tokens[-1].type == "RPAR":
265 raise SyntaxError("Expected right parenthesis at position "
266 "%d (got '%s')" % (len(tokens), tokens[-1].value))
268 tokens = tokens[1:-1]
269 types = [t.type for t in tokens]
270 if types[0] == "INT":
271 repeat = int(tokens.pop(0).value)
272 else:
273 repeat = None
275 next = tokens.pop(0)
276 if next.type == "INT_ID":
277 next = self._next(tokens, "INT")
278 width = int(next.value)
279 if tokens:
280 min = int(self._get_min(tokens))
281 else:
282 min = None
283 return IntFormat(width, min, repeat)
284 elif next.type == "EXP_ID":
285 next = self._next(tokens, "INT")
286 width = int(next.value)
288 next = self._next(tokens, "DOT")
290 next = self._next(tokens, "INT")
291 significand = int(next.value)
293 if tokens:
294 next = self._next(tokens, "EXP_ID")
296 next = self._next(tokens, "INT")
297 min = int(next.value)
298 else:
299 min = None
300 return ExpFormat(width, significand, min, repeat)
301 else:
302 raise SyntaxError("Invalid formater type %s" % next.value)
304 def _next(self, tokens, tp):
305 if not len(tokens) > 0:
306 raise SyntaxError()
307 next = tokens.pop(0)
308 self._expect(next, tp)
309 return next