Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/numpy/lib/_iotools.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

351 statements  

1"""A collection of functions designed to help I/O with ascii files. 

2 

3""" 

4__docformat__ = "restructuredtext en" 

5 

6import itertools 

7 

8import numpy as np 

9import numpy._core.numeric as nx 

10from numpy._utils import asbytes, asunicode 

11 

12 

13def _decode_line(line, encoding=None): 

14 """Decode bytes from binary input streams. 

15 

16 Defaults to decoding from 'latin1'. 

17 

18 Parameters 

19 ---------- 

20 line : str or bytes 

21 Line to be decoded. 

22 encoding : str 

23 Encoding used to decode `line`. 

24 

25 Returns 

26 ------- 

27 decoded_line : str 

28 

29 """ 

30 if type(line) is bytes: 

31 if encoding is None: 

32 encoding = "latin1" 

33 line = line.decode(encoding) 

34 

35 return line 

36 

37 

38def _is_string_like(obj): 

39 """ 

40 Check whether obj behaves like a string. 

41 """ 

42 try: 

43 obj + '' 

44 except (TypeError, ValueError): 

45 return False 

46 return True 

47 

48 

49def _is_bytes_like(obj): 

50 """ 

51 Check whether obj behaves like a bytes object. 

52 """ 

53 try: 

54 obj + b'' 

55 except (TypeError, ValueError): 

56 return False 

57 return True 

58 

59 

60def has_nested_fields(ndtype): 

61 """ 

62 Returns whether one or several fields of a dtype are nested. 

63 

64 Parameters 

65 ---------- 

66 ndtype : dtype 

67 Data-type of a structured array. 

68 

69 Raises 

70 ------ 

71 AttributeError 

72 If `ndtype` does not have a `names` attribute. 

73 

74 Examples 

75 -------- 

76 >>> import numpy as np 

77 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) 

78 >>> np.lib._iotools.has_nested_fields(dt) 

79 False 

80 

81 """ 

82 return any(ndtype[name].names is not None for name in ndtype.names or ()) 

83 

84 

85def flatten_dtype(ndtype, flatten_base=False): 

86 """ 

87 Unpack a structured data-type by collapsing nested fields and/or fields 

88 with a shape. 

89 

90 Note that the field names are lost. 

91 

92 Parameters 

93 ---------- 

94 ndtype : dtype 

95 The datatype to collapse 

96 flatten_base : bool, optional 

97 If True, transform a field with a shape into several fields. Default is 

98 False. 

99 

100 Examples 

101 -------- 

102 >>> import numpy as np 

103 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 

104 ... ('block', int, (2, 3))]) 

105 >>> np.lib._iotools.flatten_dtype(dt) 

106 [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] 

107 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) 

108 [dtype('S4'), 

109 dtype('float64'), 

110 dtype('float64'), 

111 dtype('int64'), 

112 dtype('int64'), 

113 dtype('int64'), 

114 dtype('int64'), 

115 dtype('int64'), 

116 dtype('int64')] 

117 

118 """ 

119 names = ndtype.names 

120 if names is None: 

121 if flatten_base: 

122 return [ndtype.base] * int(np.prod(ndtype.shape)) 

123 return [ndtype.base] 

124 else: 

125 types = [] 

126 for field in names: 

127 info = ndtype.fields[field] 

128 flat_dt = flatten_dtype(info[0], flatten_base) 

129 types.extend(flat_dt) 

130 return types 

131 

132 

133class LineSplitter: 

134 """ 

135 Object to split a string at a given delimiter or at given places. 

136 

137 Parameters 

138 ---------- 

139 delimiter : str, int, or sequence of ints, optional 

140 If a string, character used to delimit consecutive fields. 

141 If an integer or a sequence of integers, width(s) of each field. 

142 comments : str, optional 

143 Character used to mark the beginning of a comment. Default is '#'. 

144 autostrip : bool, optional 

145 Whether to strip each individual field. Default is True. 

146 

147 """ 

148 

149 def autostrip(self, method): 

150 """ 

151 Wrapper to strip each member of the output of `method`. 

152 

153 Parameters 

154 ---------- 

155 method : function 

156 Function that takes a single argument and returns a sequence of 

157 strings. 

158 

159 Returns 

160 ------- 

161 wrapped : function 

162 The result of wrapping `method`. `wrapped` takes a single input 

163 argument and returns a list of strings that are stripped of 

164 white-space. 

165 

166 """ 

167 return lambda input: [_.strip() for _ in method(input)] 

168 

169 def __init__(self, delimiter=None, comments='#', autostrip=True, 

170 encoding=None): 

171 delimiter = _decode_line(delimiter) 

172 comments = _decode_line(comments) 

173 

174 self.comments = comments 

175 

176 # Delimiter is a character 

177 if (delimiter is None) or isinstance(delimiter, str): 

178 delimiter = delimiter or None 

179 _handyman = self._delimited_splitter 

180 # Delimiter is a list of field widths 

181 elif hasattr(delimiter, '__iter__'): 

182 _handyman = self._variablewidth_splitter 

183 idx = np.cumsum([0] + list(delimiter)) 

184 delimiter = [slice(i, j) for (i, j) in itertools.pairwise(idx)] 

185 # Delimiter is a single integer 

186 elif int(delimiter): 

187 (_handyman, delimiter) = ( 

188 self._fixedwidth_splitter, int(delimiter)) 

189 else: 

190 (_handyman, delimiter) = (self._delimited_splitter, None) 

191 self.delimiter = delimiter 

192 if autostrip: 

193 self._handyman = self.autostrip(_handyman) 

194 else: 

195 self._handyman = _handyman 

196 self.encoding = encoding 

197 

198 def _delimited_splitter(self, line): 

199 """Chop off comments, strip, and split at delimiter. """ 

200 if self.comments is not None: 

201 line = line.split(self.comments)[0] 

202 line = line.strip(" \r\n") 

203 if not line: 

204 return [] 

205 return line.split(self.delimiter) 

206 

207 def _fixedwidth_splitter(self, line): 

208 if self.comments is not None: 

209 line = line.split(self.comments)[0] 

210 line = line.strip("\r\n") 

211 if not line: 

212 return [] 

213 fixed = self.delimiter 

214 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] 

215 return [line[s] for s in slices] 

216 

217 def _variablewidth_splitter(self, line): 

218 if self.comments is not None: 

219 line = line.split(self.comments)[0] 

220 if not line: 

221 return [] 

222 slices = self.delimiter 

223 return [line[s] for s in slices] 

224 

225 def __call__(self, line): 

226 return self._handyman(_decode_line(line, self.encoding)) 

227 

228 

229class NameValidator: 

230 """ 

231 Object to validate a list of strings to use as field names. 

232 

233 The strings are stripped of any non alphanumeric character, and spaces 

234 are replaced by '_'. During instantiation, the user can define a list 

235 of names to exclude, as well as a list of invalid characters. Names in 

236 the exclusion list are appended a '_' character. 

237 

238 Once an instance has been created, it can be called with a list of 

239 names, and a list of valid names will be created. The `__call__` 

240 method accepts an optional keyword "default" that sets the default name 

241 in case of ambiguity. By default this is 'f', so that names will 

242 default to `f0`, `f1`, etc. 

243 

244 Parameters 

245 ---------- 

246 excludelist : sequence, optional 

247 A list of names to exclude. This list is appended to the default 

248 list ['return', 'file', 'print']. Excluded names are appended an 

249 underscore: for example, `file` becomes `file_` if supplied. 

250 deletechars : str, optional 

251 A string combining invalid characters that must be deleted from the 

252 names. 

253 case_sensitive : {True, False, 'upper', 'lower'}, optional 

254 * If True, field names are case-sensitive. 

255 * If False or 'upper', field names are converted to upper case. 

256 * If 'lower', field names are converted to lower case. 

257 

258 The default value is True. 

259 replace_space : '_', optional 

260 Character(s) used in replacement of white spaces. 

261 

262 Notes 

263 ----- 

264 Calling an instance of `NameValidator` is the same as calling its 

265 method `validate`. 

266 

267 Examples 

268 -------- 

269 >>> import numpy as np 

270 >>> validator = np.lib._iotools.NameValidator() 

271 >>> validator(['file', 'field2', 'with space', 'CaSe']) 

272 ('file_', 'field2', 'with_space', 'CaSe') 

273 

274 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], 

275 ... deletechars='q', 

276 ... case_sensitive=False) 

277 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) 

278 ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') 

279 

280 """ 

281 

282 defaultexcludelist = 'return', 'file', 'print' 

283 defaultdeletechars = frozenset(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") 

284 

285 def __init__(self, excludelist=None, deletechars=None, 

286 case_sensitive=None, replace_space='_'): 

287 # Process the exclusion list .. 

288 if excludelist is None: 

289 excludelist = [] 

290 excludelist.extend(self.defaultexcludelist) 

291 self.excludelist = excludelist 

292 # Process the list of characters to delete 

293 if deletechars is None: 

294 delete = set(self.defaultdeletechars) 

295 else: 

296 delete = set(deletechars) 

297 delete.add('"') 

298 self.deletechars = delete 

299 # Process the case option ..... 

300 if (case_sensitive is None) or (case_sensitive is True): 

301 self.case_converter = lambda x: x 

302 elif (case_sensitive is False) or case_sensitive.startswith('u'): 

303 self.case_converter = lambda x: x.upper() 

304 elif case_sensitive.startswith('l'): 

305 self.case_converter = lambda x: x.lower() 

306 else: 

307 msg = f'unrecognized case_sensitive value {case_sensitive}.' 

308 raise ValueError(msg) 

309 

310 self.replace_space = replace_space 

311 

312 def validate(self, names, defaultfmt="f%i", nbfields=None): 

313 """ 

314 Validate a list of strings as field names for a structured array. 

315 

316 Parameters 

317 ---------- 

318 names : sequence of str 

319 Strings to be validated. 

320 defaultfmt : str, optional 

321 Default format string, used if validating a given string 

322 reduces its length to zero. 

323 nbfields : integer, optional 

324 Final number of validated names, used to expand or shrink the 

325 initial list of names. 

326 

327 Returns 

328 ------- 

329 validatednames : list of str 

330 The list of validated field names. 

331 

332 Notes 

333 ----- 

334 A `NameValidator` instance can be called directly, which is the 

335 same as calling `validate`. For examples, see `NameValidator`. 

336 

337 """ 

338 # Initial checks .............. 

339 if (names is None): 

340 if (nbfields is None): 

341 return None 

342 names = [] 

343 if isinstance(names, str): 

344 names = [names, ] 

345 if nbfields is not None: 

346 nbnames = len(names) 

347 if (nbnames < nbfields): 

348 names = list(names) + [''] * (nbfields - nbnames) 

349 elif (nbnames > nbfields): 

350 names = names[:nbfields] 

351 # Set some shortcuts ........... 

352 deletechars = self.deletechars 

353 excludelist = self.excludelist 

354 case_converter = self.case_converter 

355 replace_space = self.replace_space 

356 # Initializes some variables ... 

357 validatednames = [] 

358 seen = {} 

359 nbempty = 0 

360 

361 for item in names: 

362 item = case_converter(item).strip() 

363 if replace_space: 

364 item = item.replace(' ', replace_space) 

365 item = ''.join([c for c in item if c not in deletechars]) 

366 if item == '': 

367 item = defaultfmt % nbempty 

368 while item in names: 

369 nbempty += 1 

370 item = defaultfmt % nbempty 

371 nbempty += 1 

372 elif item in excludelist: 

373 item += '_' 

374 cnt = seen.get(item, 0) 

375 if cnt > 0: 

376 validatednames.append(item + '_%d' % cnt) 

377 else: 

378 validatednames.append(item) 

379 seen[item] = cnt + 1 

380 return tuple(validatednames) 

381 

382 def __call__(self, names, defaultfmt="f%i", nbfields=None): 

383 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) 

384 

385 

386def str2bool(value): 

387 """ 

388 Tries to transform a string supposed to represent a boolean to a boolean. 

389 

390 Parameters 

391 ---------- 

392 value : str 

393 The string that is transformed to a boolean. 

394 

395 Returns 

396 ------- 

397 boolval : bool 

398 The boolean representation of `value`. 

399 

400 Raises 

401 ------ 

402 ValueError 

403 If the string is not 'True' or 'False' (case independent) 

404 

405 Examples 

406 -------- 

407 >>> import numpy as np 

408 >>> np.lib._iotools.str2bool('TRUE') 

409 True 

410 >>> np.lib._iotools.str2bool('false') 

411 False 

412 

413 """ 

414 value = value.upper() 

415 if value == 'TRUE': 

416 return True 

417 elif value == 'FALSE': 

418 return False 

419 else: 

420 raise ValueError("Invalid boolean") 

421 

422 

423class ConverterError(Exception): 

424 """ 

425 Exception raised when an error occurs in a converter for string values. 

426 

427 """ 

428 pass 

429 

430 

431class ConverterLockError(ConverterError): 

432 """ 

433 Exception raised when an attempt is made to upgrade a locked converter. 

434 

435 """ 

436 pass 

437 

438 

439class ConversionWarning(UserWarning): 

440 """ 

441 Warning issued when a string converter has a problem. 

442 

443 Notes 

444 ----- 

445 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions 

446 is explicitly suppressed with the "invalid_raise" keyword. 

447 

448 """ 

449 pass 

450 

451 

452class StringConverter: 

453 """ 

454 Factory class for function transforming a string into another object 

455 (int, float). 

456 

457 After initialization, an instance can be called to transform a string 

458 into another object. If the string is recognized as representing a 

459 missing value, a default value is returned. 

460 

461 Attributes 

462 ---------- 

463 func : function 

464 Function used for the conversion. 

465 default : any 

466 Default value to return when the input corresponds to a missing 

467 value. 

468 type : type 

469 Type of the output. 

470 _status : int 

471 Integer representing the order of the conversion. 

472 _mapper : sequence of tuples 

473 Sequence of tuples (dtype, function, default value) to evaluate in 

474 order. 

475 _locked : bool 

476 Holds `locked` parameter. 

477 

478 Parameters 

479 ---------- 

480 dtype_or_func : {None, dtype, function}, optional 

481 If a `dtype`, specifies the input data type, used to define a basic 

482 function and a default value for missing data. For example, when 

483 `dtype` is float, the `func` attribute is set to `float` and the 

484 default value to `np.nan`. If a function, this function is used to 

485 convert a string to another object. In this case, it is recommended 

486 to give an associated default value as input. 

487 default : any, optional 

488 Value to return by default, that is, when the string to be 

489 converted is flagged as missing. If not given, `StringConverter` 

490 tries to supply a reasonable default value. 

491 missing_values : {None, sequence of str}, optional 

492 ``None`` or sequence of strings indicating a missing value. If ``None`` 

493 then missing values are indicated by empty entries. The default is 

494 ``None``. 

495 locked : bool, optional 

496 Whether the StringConverter should be locked to prevent automatic 

497 upgrade or not. Default is False. 

498 

499 """ 

500 _mapper = [(nx.bool, str2bool, False), 

501 (nx.int_, int, -1),] 

502 

503 # On 32-bit systems, we need to make sure that we explicitly include 

504 # nx.int64 since ns.int_ is nx.int32. 

505 if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: 

506 _mapper.append((nx.int64, int, -1)) 

507 

508 _mapper.extend([(nx.float64, float, nx.nan), 

509 (nx.complex128, complex, nx.nan + 0j), 

510 (nx.longdouble, nx.longdouble, nx.nan), 

511 # If a non-default dtype is passed, fall back to generic 

512 # ones (should only be used for the converter) 

513 (nx.integer, int, -1), 

514 (nx.floating, float, nx.nan), 

515 (nx.complexfloating, complex, nx.nan + 0j), 

516 # Last, try with the string types (must be last, because 

517 # `_mapper[-1]` is used as default in some cases) 

518 (nx.str_, asunicode, '???'), 

519 (nx.bytes_, asbytes, '???'), 

520 ]) 

521 

522 @classmethod 

523 def _getdtype(cls, val): 

524 """Returns the dtype of the input variable.""" 

525 return np.array(val).dtype 

526 

527 @classmethod 

528 def _getsubdtype(cls, val): 

529 """Returns the type of the dtype of the input variable.""" 

530 return np.array(val).dtype.type 

531 

532 @classmethod 

533 def _dtypeortype(cls, dtype): 

534 """Returns dtype for datetime64 and type of dtype otherwise.""" 

535 

536 # This is a bit annoying. We want to return the "general" type in most 

537 # cases (ie. "string" rather than "S10"), but we want to return the 

538 # specific type for datetime64 (ie. "datetime64[us]" rather than 

539 # "datetime64"). 

540 if dtype.type == np.datetime64: 

541 return dtype 

542 return dtype.type 

543 

544 @classmethod 

545 def upgrade_mapper(cls, func, default=None): 

546 """ 

547 Upgrade the mapper of a StringConverter by adding a new function and 

548 its corresponding default. 

549 

550 The input function (or sequence of functions) and its associated 

551 default value (if any) is inserted in penultimate position of the 

552 mapper. The corresponding type is estimated from the dtype of the 

553 default value. 

554 

555 Parameters 

556 ---------- 

557 func : var 

558 Function, or sequence of functions 

559 

560 Examples 

561 -------- 

562 >>> import dateutil.parser 

563 >>> import datetime 

564 >>> dateparser = dateutil.parser.parse 

565 >>> defaultdate = datetime.date(2000, 1, 1) 

566 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) 

567 """ 

568 # Func is a single functions 

569 if callable(func): 

570 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) 

571 return 

572 elif hasattr(func, '__iter__'): 

573 if isinstance(func[0], (tuple, list)): 

574 for _ in func: 

575 cls._mapper.insert(-1, _) 

576 return 

577 if default is None: 

578 default = [None] * len(func) 

579 else: 

580 default = list(default) 

581 default.append([None] * (len(func) - len(default))) 

582 for fct, dft in zip(func, default): 

583 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) 

584 

585 @classmethod 

586 def _find_map_entry(cls, dtype): 

587 # if a converter for the specific dtype is available use that 

588 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

589 if dtype.type == deftype: 

590 return i, (deftype, func, default_def) 

591 

592 # otherwise find an inexact match 

593 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

594 if np.issubdtype(dtype.type, deftype): 

595 return i, (deftype, func, default_def) 

596 

597 raise LookupError 

598 

599 def __init__(self, dtype_or_func=None, default=None, missing_values=None, 

600 locked=False): 

601 # Defines a lock for upgrade 

602 self._locked = bool(locked) 

603 # No input dtype: minimal initialization 

604 if dtype_or_func is None: 

605 self.func = str2bool 

606 self._status = 0 

607 self.default = default or False 

608 dtype = np.dtype('bool') 

609 else: 

610 # Is the input a np.dtype ? 

611 try: 

612 self.func = None 

613 dtype = np.dtype(dtype_or_func) 

614 except TypeError: 

615 # dtype_or_func must be a function, then 

616 if not callable(dtype_or_func): 

617 errmsg = ("The input argument `dtype` is neither a" 

618 " function nor a dtype (got '%s' instead)") 

619 raise TypeError(errmsg % type(dtype_or_func)) 

620 # Set the function 

621 self.func = dtype_or_func 

622 # If we don't have a default, try to guess it or set it to 

623 # None 

624 if default is None: 

625 try: 

626 default = self.func('0') 

627 except ValueError: 

628 default = None 

629 dtype = self._getdtype(default) 

630 

631 # find the best match in our mapper 

632 try: 

633 self._status, (_, func, default_def) = self._find_map_entry(dtype) 

634 except LookupError: 

635 # no match 

636 self.default = default 

637 _, func, _ = self._mapper[-1] 

638 self._status = 0 

639 else: 

640 # use the found default only if we did not already have one 

641 if default is None: 

642 self.default = default_def 

643 else: 

644 self.default = default 

645 

646 # If the input was a dtype, set the function to the last we saw 

647 if self.func is None: 

648 self.func = func 

649 

650 # If the status is 1 (int), change the function to 

651 # something more robust. 

652 if self.func == self._mapper[1][1]: 

653 if issubclass(dtype.type, np.uint64): 

654 self.func = np.uint64 

655 elif issubclass(dtype.type, np.int64): 

656 self.func = np.int64 

657 else: 

658 self.func = lambda x: int(float(x)) 

659 # Store the list of strings corresponding to missing values. 

660 if missing_values is None: 

661 self.missing_values = {''} 

662 else: 

663 if isinstance(missing_values, str): 

664 missing_values = missing_values.split(",") 

665 self.missing_values = set(list(missing_values) + ['']) 

666 

667 self._callingfunction = self._strict_call 

668 self.type = self._dtypeortype(dtype) 

669 self._checked = False 

670 self._initial_default = default 

671 

672 def _loose_call(self, value): 

673 try: 

674 return self.func(value) 

675 except ValueError: 

676 return self.default 

677 

678 def _strict_call(self, value): 

679 try: 

680 

681 # We check if we can convert the value using the current function 

682 new_value = self.func(value) 

683 

684 # In addition to having to check whether func can convert the 

685 # value, we also have to make sure that we don't get overflow 

686 # errors for integers. 

687 if self.func is int: 

688 try: 

689 np.array(value, dtype=self.type) 

690 except OverflowError: 

691 raise ValueError 

692 

693 # We're still here so we can now return the new value 

694 return new_value 

695 

696 except ValueError: 

697 if value.strip() in self.missing_values: 

698 if not self._status: 

699 self._checked = False 

700 return self.default 

701 raise ValueError(f"Cannot convert string '{value}'") 

702 

703 def __call__(self, value): 

704 return self._callingfunction(value) 

705 

706 def _do_upgrade(self): 

707 # Raise an exception if we locked the converter... 

708 if self._locked: 

709 errmsg = "Converter is locked and cannot be upgraded" 

710 raise ConverterLockError(errmsg) 

711 _statusmax = len(self._mapper) 

712 # Complains if we try to upgrade by the maximum 

713 _status = self._status 

714 if _status == _statusmax: 

715 errmsg = "Could not find a valid conversion function" 

716 raise ConverterError(errmsg) 

717 elif _status < _statusmax - 1: 

718 _status += 1 

719 self.type, self.func, default = self._mapper[_status] 

720 self._status = _status 

721 if self._initial_default is not None: 

722 self.default = self._initial_default 

723 else: 

724 self.default = default 

725 

726 def upgrade(self, value): 

727 """ 

728 Find the best converter for a given string, and return the result. 

729 

730 The supplied string `value` is converted by testing different 

731 converters in order. First the `func` method of the 

732 `StringConverter` instance is tried, if this fails other available 

733 converters are tried. The order in which these other converters 

734 are tried is determined by the `_status` attribute of the instance. 

735 

736 Parameters 

737 ---------- 

738 value : str 

739 The string to convert. 

740 

741 Returns 

742 ------- 

743 out : any 

744 The result of converting `value` with the appropriate converter. 

745 

746 """ 

747 self._checked = True 

748 try: 

749 return self._strict_call(value) 

750 except ValueError: 

751 self._do_upgrade() 

752 return self.upgrade(value) 

753 

754 def iterupgrade(self, value): 

755 self._checked = True 

756 if not hasattr(value, '__iter__'): 

757 value = (value,) 

758 _strict_call = self._strict_call 

759 try: 

760 for _m in value: 

761 _strict_call(_m) 

762 except ValueError: 

763 self._do_upgrade() 

764 self.iterupgrade(value) 

765 

766 def update(self, func, default=None, testing_value=None, 

767 missing_values='', locked=False): 

768 """ 

769 Set StringConverter attributes directly. 

770 

771 Parameters 

772 ---------- 

773 func : function 

774 Conversion function. 

775 default : any, optional 

776 Value to return by default, that is, when the string to be 

777 converted is flagged as missing. If not given, 

778 `StringConverter` tries to supply a reasonable default value. 

779 testing_value : str, optional 

780 A string representing a standard input value of the converter. 

781 This string is used to help defining a reasonable default 

782 value. 

783 missing_values : {sequence of str, None}, optional 

784 Sequence of strings indicating a missing value. If ``None``, then 

785 the existing `missing_values` are cleared. The default is ``''``. 

786 locked : bool, optional 

787 Whether the StringConverter should be locked to prevent 

788 automatic upgrade or not. Default is False. 

789 

790 Notes 

791 ----- 

792 `update` takes the same parameters as the constructor of 

793 `StringConverter`, except that `func` does not accept a `dtype` 

794 whereas `dtype_or_func` in the constructor does. 

795 

796 """ 

797 self.func = func 

798 self._locked = locked 

799 

800 # Don't reset the default to None if we can avoid it 

801 if default is not None: 

802 self.default = default 

803 self.type = self._dtypeortype(self._getdtype(default)) 

804 else: 

805 try: 

806 tester = func(testing_value or '1') 

807 except (TypeError, ValueError): 

808 tester = None 

809 self.type = self._dtypeortype(self._getdtype(tester)) 

810 

811 # Add the missing values to the existing set or clear it. 

812 if missing_values is None: 

813 # Clear all missing values even though the ctor initializes it to 

814 # set(['']) when the argument is None. 

815 self.missing_values = set() 

816 else: 

817 if not np.iterable(missing_values): 

818 missing_values = [missing_values] 

819 if not all(isinstance(v, str) for v in missing_values): 

820 raise TypeError("missing_values must be strings or unicode") 

821 self.missing_values.update(missing_values) 

822 

823 

824def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): 

825 """ 

826 Convenience function to create a `np.dtype` object. 

827 

828 The function processes the input `dtype` and matches it with the given 

829 names. 

830 

831 Parameters 

832 ---------- 

833 ndtype : var 

834 Definition of the dtype. Can be any string or dictionary recognized 

835 by the `np.dtype` function, or a sequence of types. 

836 names : str or sequence, optional 

837 Sequence of strings to use as field names for a structured dtype. 

838 For convenience, `names` can be a string of a comma-separated list 

839 of names. 

840 defaultfmt : str, optional 

841 Format string used to define missing names, such as ``"f%i"`` 

842 (default) or ``"fields_%02i"``. 

843 validationargs : optional 

844 A series of optional arguments used to initialize a 

845 `NameValidator`. 

846 

847 Examples 

848 -------- 

849 >>> import numpy as np 

850 >>> np.lib._iotools.easy_dtype(float) 

851 dtype('float64') 

852 >>> np.lib._iotools.easy_dtype("i4, f8") 

853 dtype([('f0', '<i4'), ('f1', '<f8')]) 

854 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") 

855 dtype([('field_000', '<i4'), ('field_001', '<f8')]) 

856 

857 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") 

858 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')]) 

859 >>> np.lib._iotools.easy_dtype(float, names="a,b,c") 

860 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')]) 

861 

862 """ 

863 try: 

864 ndtype = np.dtype(ndtype) 

865 except TypeError: 

866 validate = NameValidator(**validationargs) 

867 nbfields = len(ndtype) 

868 if names is None: 

869 names = [''] * len(ndtype) 

870 elif isinstance(names, str): 

871 names = names.split(",") 

872 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) 

873 ndtype = np.dtype({"formats": ndtype, "names": names}) 

874 else: 

875 # Explicit names 

876 if names is not None: 

877 validate = NameValidator(**validationargs) 

878 if isinstance(names, str): 

879 names = names.split(",") 

880 # Simple dtype: repeat to match the nb of names 

881 if ndtype.names is None: 

882 formats = tuple([ndtype.type] * len(names)) 

883 names = validate(names, defaultfmt=defaultfmt) 

884 ndtype = np.dtype(list(zip(names, formats))) 

885 # Structured dtype: just validate the names as needed 

886 else: 

887 ndtype.names = validate(names, nbfields=len(ndtype.names), 

888 defaultfmt=defaultfmt) 

889 # No implicit names 

890 elif ndtype.names is not None: 

891 validate = NameValidator(**validationargs) 

892 # Default initial names : should we change the format ? 

893 numbered_names = tuple(f"f{i}" for i in range(len(ndtype.names))) 

894 if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")): 

895 ndtype.names = validate([''] * len(ndtype.names), 

896 defaultfmt=defaultfmt) 

897 # Explicit initial names : just validate 

898 else: 

899 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) 

900 return ndtype