Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_harwell_boeing/_fortran_format_parser.py: 25%

163 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1""" 

2Preliminary module to handle Fortran formats for IO. Does not use this outside 

3scipy.sparse io for now, until the API is deemed reasonable. 

4 

5The *Format classes handle conversion between Fortran and Python format, and 

6FortranFormatParser can create *Format instances from raw Fortran format 

7strings (e.g. '(3I4)', '(10I3)', etc...) 

8""" 

9import re 

10 

11import numpy as np 

12 

13 

14__all__ = ["BadFortranFormat", "FortranFormatParser", "IntFormat", "ExpFormat"] 

15 

16 

17TOKENS = { 

18 "LPAR": r"\(", 

19 "RPAR": r"\)", 

20 "INT_ID": r"I", 

21 "EXP_ID": r"E", 

22 "INT": r"\d+", 

23 "DOT": r"\.", 

24} 

25 

26 

27class BadFortranFormat(SyntaxError): 

28 pass 

29 

30 

31def number_digits(n): 

32 return int(np.floor(np.log10(np.abs(n))) + 1) 

33 

34 

35class IntFormat: 

36 @classmethod 

37 def from_number(cls, n, min=None): 

38 """Given an integer, returns a "reasonable" IntFormat instance to represent 

39 any number between 0 and n if n > 0, -n and n if n < 0 

40 

41 Parameters 

42 ---------- 

43 n : int 

44 max number one wants to be able to represent 

45 min : int 

46 minimum number of characters to use for the format 

47 

48 Returns 

49 ------- 

50 res : IntFormat 

51 IntFormat instance with reasonable (see Notes) computed width 

52 

53 Notes 

54 ----- 

55 Reasonable should be understood as the minimal string length necessary 

56 without losing precision. For example, IntFormat.from_number(1) will 

57 return an IntFormat instance of width 2, so that any 0 and 1 may be 

58 represented as 1-character strings without loss of information. 

59 """ 

60 width = number_digits(n) + 1 

61 if n < 0: 

62 width += 1 

63 repeat = 80 // width 

64 return cls(width, min, repeat=repeat) 

65 

66 def __init__(self, width, min=None, repeat=None): 

67 self.width = width 

68 self.repeat = repeat 

69 self.min = min 

70 

71 def __repr__(self): 

72 r = "IntFormat(" 

73 if self.repeat: 

74 r += "%d" % self.repeat 

75 r += "I%d" % self.width 

76 if self.min: 

77 r += ".%d" % self.min 

78 return r + ")" 

79 

80 @property 

81 def fortran_format(self): 

82 r = "(" 

83 if self.repeat: 

84 r += "%d" % self.repeat 

85 r += "I%d" % self.width 

86 if self.min: 

87 r += ".%d" % self.min 

88 return r + ")" 

89 

90 @property 

91 def python_format(self): 

92 return "%" + str(self.width) + "d" 

93 

94 

95class ExpFormat: 

96 @classmethod 

97 def from_number(cls, n, min=None): 

98 """Given a float number, returns a "reasonable" ExpFormat instance to 

99 represent any number between -n and n. 

100 

101 Parameters 

102 ---------- 

103 n : float 

104 max number one wants to be able to represent 

105 min : int 

106 minimum number of characters to use for the format 

107 

108 Returns 

109 ------- 

110 res : ExpFormat 

111 ExpFormat instance with reasonable (see Notes) computed width 

112 

113 Notes 

114 ----- 

115 Reasonable should be understood as the minimal string length necessary 

116 to avoid losing precision. 

117 """ 

118 # len of one number in exp format: sign + 1|0 + "." + 

119 # number of digit for fractional part + 'E' + sign of exponent + 

120 # len of exponent 

121 finfo = np.finfo(n.dtype) 

122 # Number of digits for fractional part 

123 n_prec = finfo.precision + 1 

124 # Number of digits for exponential part 

125 n_exp = number_digits(np.max(np.abs([finfo.maxexp, finfo.minexp]))) 

126 width = 1 + 1 + n_prec + 1 + n_exp + 1 

127 if n < 0: 

128 width += 1 

129 repeat = int(np.floor(80 / width)) 

130 return cls(width, n_prec, min, repeat=repeat) 

131 

132 def __init__(self, width, significand, min=None, repeat=None): 

133 """\ 

134 Parameters 

135 ---------- 

136 width : int 

137 number of characters taken by the string (includes space). 

138 """ 

139 self.width = width 

140 self.significand = significand 

141 self.repeat = repeat 

142 self.min = min 

143 

144 def __repr__(self): 

145 r = "ExpFormat(" 

146 if self.repeat: 

147 r += "%d" % self.repeat 

148 r += "E%d.%d" % (self.width, self.significand) 

149 if self.min: 

150 r += "E%d" % self.min 

151 return r + ")" 

152 

153 @property 

154 def fortran_format(self): 

155 r = "(" 

156 if self.repeat: 

157 r += "%d" % self.repeat 

158 r += "E%d.%d" % (self.width, self.significand) 

159 if self.min: 

160 r += "E%d" % self.min 

161 return r + ")" 

162 

163 @property 

164 def python_format(self): 

165 return "%" + str(self.width-1) + "." + str(self.significand) + "E" 

166 

167 

168class Token: 

169 def __init__(self, type, value, pos): 

170 self.type = type 

171 self.value = value 

172 self.pos = pos 

173 

174 def __str__(self): 

175 return f"""Token('{self.type}', "{self.value}")""" 

176 

177 def __repr__(self): 

178 return self.__str__() 

179 

180 

181class Tokenizer: 

182 def __init__(self): 

183 self.tokens = list(TOKENS.keys()) 

184 self.res = [re.compile(TOKENS[i]) for i in self.tokens] 

185 

186 def input(self, s): 

187 self.data = s 

188 self.curpos = 0 

189 self.len = len(s) 

190 

191 def next_token(self): 

192 curpos = self.curpos 

193 

194 while curpos < self.len: 

195 for i, r in enumerate(self.res): 

196 m = r.match(self.data, curpos) 

197 if m is None: 

198 continue 

199 else: 

200 self.curpos = m.end() 

201 return Token(self.tokens[i], m.group(), self.curpos) 

202 raise SyntaxError("Unknown character at position %d (%s)" 

203 % (self.curpos, self.data[curpos])) 

204 

205 

206# Grammar for fortran format: 

207# format : LPAR format_string RPAR 

208# format_string : repeated | simple 

209# repeated : repeat simple 

210# simple : int_fmt | exp_fmt 

211# int_fmt : INT_ID width 

212# exp_fmt : simple_exp_fmt 

213# simple_exp_fmt : EXP_ID width DOT significand 

214# extended_exp_fmt : EXP_ID width DOT significand EXP_ID ndigits 

215# repeat : INT 

216# width : INT 

217# significand : INT 

218# ndigits : INT 

219 

220# Naive fortran formatter - parser is hand-made 

221class FortranFormatParser: 

222 """Parser for Fortran format strings. The parse method returns a *Format 

223 instance. 

224 

225 Notes 

226 ----- 

227 Only ExpFormat (exponential format for floating values) and IntFormat 

228 (integer format) for now. 

229 """ 

230 def __init__(self): 

231 self.tokenizer = Tokenizer() 

232 

233 def parse(self, s): 

234 self.tokenizer.input(s) 

235 

236 tokens = [] 

237 

238 try: 

239 while True: 

240 t = self.tokenizer.next_token() 

241 if t is None: 

242 break 

243 else: 

244 tokens.append(t) 

245 return self._parse_format(tokens) 

246 except SyntaxError as e: 

247 raise BadFortranFormat(str(e)) from e 

248 

249 def _get_min(self, tokens): 

250 next = tokens.pop(0) 

251 if not next.type == "DOT": 

252 raise SyntaxError() 

253 next = tokens.pop(0) 

254 return next.value 

255 

256 def _expect(self, token, tp): 

257 if not token.type == tp: 

258 raise SyntaxError() 

259 

260 def _parse_format(self, tokens): 

261 if not tokens[0].type == "LPAR": 

262 raise SyntaxError("Expected left parenthesis at position " 

263 "%d (got '%s')" % (0, tokens[0].value)) 

264 elif not tokens[-1].type == "RPAR": 

265 raise SyntaxError("Expected right parenthesis at position " 

266 "%d (got '%s')" % (len(tokens), tokens[-1].value)) 

267 

268 tokens = tokens[1:-1] 

269 types = [t.type for t in tokens] 

270 if types[0] == "INT": 

271 repeat = int(tokens.pop(0).value) 

272 else: 

273 repeat = None 

274 

275 next = tokens.pop(0) 

276 if next.type == "INT_ID": 

277 next = self._next(tokens, "INT") 

278 width = int(next.value) 

279 if tokens: 

280 min = int(self._get_min(tokens)) 

281 else: 

282 min = None 

283 return IntFormat(width, min, repeat) 

284 elif next.type == "EXP_ID": 

285 next = self._next(tokens, "INT") 

286 width = int(next.value) 

287 

288 next = self._next(tokens, "DOT") 

289 

290 next = self._next(tokens, "INT") 

291 significand = int(next.value) 

292 

293 if tokens: 

294 next = self._next(tokens, "EXP_ID") 

295 

296 next = self._next(tokens, "INT") 

297 min = int(next.value) 

298 else: 

299 min = None 

300 return ExpFormat(width, significand, min, repeat) 

301 else: 

302 raise SyntaxError("Invalid formater type %s" % next.value) 

303 

304 def _next(self, tokens, tp): 

305 if not len(tokens) > 0: 

306 raise SyntaxError() 

307 next = tokens.pop(0) 

308 self._expect(next, tp) 

309 return next