Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_iotools.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

349 statements  

1"""A collection of functions designed to help I/O with ascii files. 

2 

3""" 

4__docformat__ = "restructuredtext en" 

5 

6import numpy as np 

7import numpy._core.numeric as nx 

8from numpy._utils import asbytes, asunicode 

9 

10 

11def _decode_line(line, encoding=None): 

12 """Decode bytes from binary input streams. 

13 

14 Defaults to decoding from 'latin1'. That differs from the behavior of 

15 np.compat.asunicode that decodes from 'ascii'. 

16 

17 Parameters 

18 ---------- 

19 line : str or bytes 

20 Line to be decoded. 

21 encoding : str 

22 Encoding used to decode `line`. 

23 

24 Returns 

25 ------- 

26 decoded_line : str 

27 

28 """ 

29 if type(line) is bytes: 

30 if encoding is None: 

31 encoding = "latin1" 

32 line = line.decode(encoding) 

33 

34 return line 

35 

36 

37def _is_string_like(obj): 

38 """ 

39 Check whether obj behaves like a string. 

40 """ 

41 try: 

42 obj + '' 

43 except (TypeError, ValueError): 

44 return False 

45 return True 

46 

47 

48def _is_bytes_like(obj): 

49 """ 

50 Check whether obj behaves like a bytes object. 

51 """ 

52 try: 

53 obj + b'' 

54 except (TypeError, ValueError): 

55 return False 

56 return True 

57 

58 

59def has_nested_fields(ndtype): 

60 """ 

61 Returns whether one or several fields of a dtype are nested. 

62 

63 Parameters 

64 ---------- 

65 ndtype : dtype 

66 Data-type of a structured array. 

67 

68 Raises 

69 ------ 

70 AttributeError 

71 If `ndtype` does not have a `names` attribute. 

72 

73 Examples 

74 -------- 

75 >>> import numpy as np 

76 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) 

77 >>> np.lib._iotools.has_nested_fields(dt) 

78 False 

79 

80 """ 

81 return any(ndtype[name].names is not None for name in ndtype.names or ()) 

82 

83 

84def flatten_dtype(ndtype, flatten_base=False): 

85 """ 

86 Unpack a structured data-type by collapsing nested fields and/or fields 

87 with a shape. 

88 

89 Note that the field names are lost. 

90 

91 Parameters 

92 ---------- 

93 ndtype : dtype 

94 The datatype to collapse 

95 flatten_base : bool, optional 

96 If True, transform a field with a shape into several fields. Default is 

97 False. 

98 

99 Examples 

100 -------- 

101 >>> import numpy as np 

102 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 

103 ... ('block', int, (2, 3))]) 

104 >>> np.lib._iotools.flatten_dtype(dt) 

105 [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] 

106 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) 

107 [dtype('S4'), 

108 dtype('float64'), 

109 dtype('float64'), 

110 dtype('int64'), 

111 dtype('int64'), 

112 dtype('int64'), 

113 dtype('int64'), 

114 dtype('int64'), 

115 dtype('int64')] 

116 

117 """ 

118 names = ndtype.names 

119 if names is None: 

120 if flatten_base: 

121 return [ndtype.base] * int(np.prod(ndtype.shape)) 

122 return [ndtype.base] 

123 else: 

124 types = [] 

125 for field in names: 

126 info = ndtype.fields[field] 

127 flat_dt = flatten_dtype(info[0], flatten_base) 

128 types.extend(flat_dt) 

129 return types 

130 

131 

132class LineSplitter: 

133 """ 

134 Object to split a string at a given delimiter or at given places. 

135 

136 Parameters 

137 ---------- 

138 delimiter : str, int, or sequence of ints, optional 

139 If a string, character used to delimit consecutive fields. 

140 If an integer or a sequence of integers, width(s) of each field. 

141 comments : str, optional 

142 Character used to mark the beginning of a comment. Default is '#'. 

143 autostrip : bool, optional 

144 Whether to strip each individual field. Default is True. 

145 

146 """ 

147 

148 def autostrip(self, method): 

149 """ 

150 Wrapper to strip each member of the output of `method`. 

151 

152 Parameters 

153 ---------- 

154 method : function 

155 Function that takes a single argument and returns a sequence of 

156 strings. 

157 

158 Returns 

159 ------- 

160 wrapped : function 

161 The result of wrapping `method`. `wrapped` takes a single input 

162 argument and returns a list of strings that are stripped of 

163 white-space. 

164 

165 """ 

166 return lambda input: [_.strip() for _ in method(input)] 

167 

168 def __init__(self, delimiter=None, comments='#', autostrip=True, 

169 encoding=None): 

170 delimiter = _decode_line(delimiter) 

171 comments = _decode_line(comments) 

172 

173 self.comments = comments 

174 

175 # Delimiter is a character 

176 if (delimiter is None) or isinstance(delimiter, str): 

177 delimiter = delimiter or None 

178 _handyman = self._delimited_splitter 

179 # Delimiter is a list of field widths 

180 elif hasattr(delimiter, '__iter__'): 

181 _handyman = self._variablewidth_splitter 

182 idx = np.cumsum([0] + list(delimiter)) 

183 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])] 

184 # Delimiter is a single integer 

185 elif int(delimiter): 

186 (_handyman, delimiter) = ( 

187 self._fixedwidth_splitter, int(delimiter)) 

188 else: 

189 (_handyman, delimiter) = (self._delimited_splitter, None) 

190 self.delimiter = delimiter 

191 if autostrip: 

192 self._handyman = self.autostrip(_handyman) 

193 else: 

194 self._handyman = _handyman 

195 self.encoding = encoding 

196 

197 def _delimited_splitter(self, line): 

198 """Chop off comments, strip, and split at delimiter. """ 

199 if self.comments is not None: 

200 line = line.split(self.comments)[0] 

201 line = line.strip(" \r\n") 

202 if not line: 

203 return [] 

204 return line.split(self.delimiter) 

205 

206 def _fixedwidth_splitter(self, line): 

207 if self.comments is not None: 

208 line = line.split(self.comments)[0] 

209 line = line.strip("\r\n") 

210 if not line: 

211 return [] 

212 fixed = self.delimiter 

213 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] 

214 return [line[s] for s in slices] 

215 

216 def _variablewidth_splitter(self, line): 

217 if self.comments is not None: 

218 line = line.split(self.comments)[0] 

219 if not line: 

220 return [] 

221 slices = self.delimiter 

222 return [line[s] for s in slices] 

223 

224 def __call__(self, line): 

225 return self._handyman(_decode_line(line, self.encoding)) 

226 

227 

228class NameValidator: 

229 """ 

230 Object to validate a list of strings to use as field names. 

231 

232 The strings are stripped of any non alphanumeric character, and spaces 

233 are replaced by '_'. During instantiation, the user can define a list 

234 of names to exclude, as well as a list of invalid characters. Names in 

235 the exclusion list are appended a '_' character. 

236 

237 Once an instance has been created, it can be called with a list of 

238 names, and a list of valid names will be created. The `__call__` 

239 method accepts an optional keyword "default" that sets the default name 

240 in case of ambiguity. By default this is 'f', so that names will 

241 default to `f0`, `f1`, etc. 

242 

243 Parameters 

244 ---------- 

245 excludelist : sequence, optional 

246 A list of names to exclude. This list is appended to the default 

247 list ['return', 'file', 'print']. Excluded names are appended an 

248 underscore: for example, `file` becomes `file_` if supplied. 

249 deletechars : str, optional 

250 A string combining invalid characters that must be deleted from the 

251 names. 

252 case_sensitive : {True, False, 'upper', 'lower'}, optional 

253 * If True, field names are case-sensitive. 

254 * If False or 'upper', field names are converted to upper case. 

255 * If 'lower', field names are converted to lower case. 

256 

257 The default value is True. 

258 replace_space : '_', optional 

259 Character(s) used in replacement of white spaces. 

260 

261 Notes 

262 ----- 

263 Calling an instance of `NameValidator` is the same as calling its 

264 method `validate`. 

265 

266 Examples 

267 -------- 

268 >>> import numpy as np 

269 >>> validator = np.lib._iotools.NameValidator() 

270 >>> validator(['file', 'field2', 'with space', 'CaSe']) 

271 ('file_', 'field2', 'with_space', 'CaSe') 

272 

273 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], 

274 ... deletechars='q', 

275 ... case_sensitive=False) 

276 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) 

277 ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') 

278 

279 """ 

280 

281 defaultexcludelist = ['return', 'file', 'print'] 

282 defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") 

283 

284 def __init__(self, excludelist=None, deletechars=None, 

285 case_sensitive=None, replace_space='_'): 

286 # Process the exclusion list .. 

287 if excludelist is None: 

288 excludelist = [] 

289 excludelist.extend(self.defaultexcludelist) 

290 self.excludelist = excludelist 

291 # Process the list of characters to delete 

292 if deletechars is None: 

293 delete = self.defaultdeletechars 

294 else: 

295 delete = set(deletechars) 

296 delete.add('"') 

297 self.deletechars = delete 

298 # Process the case option ..... 

299 if (case_sensitive is None) or (case_sensitive is True): 

300 self.case_converter = lambda x: x 

301 elif (case_sensitive is False) or case_sensitive.startswith('u'): 

302 self.case_converter = lambda x: x.upper() 

303 elif case_sensitive.startswith('l'): 

304 self.case_converter = lambda x: x.lower() 

305 else: 

306 msg = 'unrecognized case_sensitive value %s.' % case_sensitive 

307 raise ValueError(msg) 

308 

309 self.replace_space = replace_space 

310 

311 def validate(self, names, defaultfmt="f%i", nbfields=None): 

312 """ 

313 Validate a list of strings as field names for a structured array. 

314 

315 Parameters 

316 ---------- 

317 names : sequence of str 

318 Strings to be validated. 

319 defaultfmt : str, optional 

320 Default format string, used if validating a given string 

321 reduces its length to zero. 

322 nbfields : integer, optional 

323 Final number of validated names, used to expand or shrink the 

324 initial list of names. 

325 

326 Returns 

327 ------- 

328 validatednames : list of str 

329 The list of validated field names. 

330 

331 Notes 

332 ----- 

333 A `NameValidator` instance can be called directly, which is the 

334 same as calling `validate`. For examples, see `NameValidator`. 

335 

336 """ 

337 # Initial checks .............. 

338 if (names is None): 

339 if (nbfields is None): 

340 return None 

341 names = [] 

342 if isinstance(names, str): 

343 names = [names, ] 

344 if nbfields is not None: 

345 nbnames = len(names) 

346 if (nbnames < nbfields): 

347 names = list(names) + [''] * (nbfields - nbnames) 

348 elif (nbnames > nbfields): 

349 names = names[:nbfields] 

350 # Set some shortcuts ........... 

351 deletechars = self.deletechars 

352 excludelist = self.excludelist 

353 case_converter = self.case_converter 

354 replace_space = self.replace_space 

355 # Initializes some variables ... 

356 validatednames = [] 

357 seen = dict() 

358 nbempty = 0 

359 

360 for item in names: 

361 item = case_converter(item).strip() 

362 if replace_space: 

363 item = item.replace(' ', replace_space) 

364 item = ''.join([c for c in item if c not in deletechars]) 

365 if item == '': 

366 item = defaultfmt % nbempty 

367 while item in names: 

368 nbempty += 1 

369 item = defaultfmt % nbempty 

370 nbempty += 1 

371 elif item in excludelist: 

372 item += '_' 

373 cnt = seen.get(item, 0) 

374 if cnt > 0: 

375 validatednames.append(item + '_%d' % cnt) 

376 else: 

377 validatednames.append(item) 

378 seen[item] = cnt + 1 

379 return tuple(validatednames) 

380 

381 def __call__(self, names, defaultfmt="f%i", nbfields=None): 

382 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) 

383 

384 

385def str2bool(value): 

386 """ 

387 Tries to transform a string supposed to represent a boolean to a boolean. 

388 

389 Parameters 

390 ---------- 

391 value : str 

392 The string that is transformed to a boolean. 

393 

394 Returns 

395 ------- 

396 boolval : bool 

397 The boolean representation of `value`. 

398 

399 Raises 

400 ------ 

401 ValueError 

402 If the string is not 'True' or 'False' (case independent) 

403 

404 Examples 

405 -------- 

406 >>> import numpy as np 

407 >>> np.lib._iotools.str2bool('TRUE') 

408 True 

409 >>> np.lib._iotools.str2bool('false') 

410 False 

411 

412 """ 

413 value = value.upper() 

414 if value == 'TRUE': 

415 return True 

416 elif value == 'FALSE': 

417 return False 

418 else: 

419 raise ValueError("Invalid boolean") 

420 

421 

422class ConverterError(Exception): 

423 """ 

424 Exception raised when an error occurs in a converter for string values. 

425 

426 """ 

427 pass 

428 

429 

430class ConverterLockError(ConverterError): 

431 """ 

432 Exception raised when an attempt is made to upgrade a locked converter. 

433 

434 """ 

435 pass 

436 

437 

438class ConversionWarning(UserWarning): 

439 """ 

440 Warning issued when a string converter has a problem. 

441 

442 Notes 

443 ----- 

444 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions 

445 is explicitly suppressed with the "invalid_raise" keyword. 

446 

447 """ 

448 pass 

449 

450 

451class StringConverter: 

452 """ 

453 Factory class for function transforming a string into another object 

454 (int, float). 

455 

456 After initialization, an instance can be called to transform a string 

457 into another object. If the string is recognized as representing a 

458 missing value, a default value is returned. 

459 

460 Attributes 

461 ---------- 

462 func : function 

463 Function used for the conversion. 

464 default : any 

465 Default value to return when the input corresponds to a missing 

466 value. 

467 type : type 

468 Type of the output. 

469 _status : int 

470 Integer representing the order of the conversion. 

471 _mapper : sequence of tuples 

472 Sequence of tuples (dtype, function, default value) to evaluate in 

473 order. 

474 _locked : bool 

475 Holds `locked` parameter. 

476 

477 Parameters 

478 ---------- 

479 dtype_or_func : {None, dtype, function}, optional 

480 If a `dtype`, specifies the input data type, used to define a basic 

481 function and a default value for missing data. For example, when 

482 `dtype` is float, the `func` attribute is set to `float` and the 

483 default value to `np.nan`. If a function, this function is used to 

484 convert a string to another object. In this case, it is recommended 

485 to give an associated default value as input. 

486 default : any, optional 

487 Value to return by default, that is, when the string to be 

488 converted is flagged as missing. If not given, `StringConverter` 

489 tries to supply a reasonable default value. 

490 missing_values : {None, sequence of str}, optional 

491 ``None`` or sequence of strings indicating a missing value. If ``None`` 

492 then missing values are indicated by empty entries. The default is 

493 ``None``. 

494 locked : bool, optional 

495 Whether the StringConverter should be locked to prevent automatic 

496 upgrade or not. Default is False. 

497 

498 """ 

499 _mapper = [(nx.bool, str2bool, False), 

500 (nx.int_, int, -1),] 

501 

502 # On 32-bit systems, we need to make sure that we explicitly include 

503 # nx.int64 since ns.int_ is nx.int32. 

504 if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: 

505 _mapper.append((nx.int64, int, -1)) 

506 

507 _mapper.extend([(nx.float64, float, nx.nan), 

508 (nx.complex128, complex, nx.nan + 0j), 

509 (nx.longdouble, nx.longdouble, nx.nan), 

510 # If a non-default dtype is passed, fall back to generic 

511 # ones (should only be used for the converter) 

512 (nx.integer, int, -1), 

513 (nx.floating, float, nx.nan), 

514 (nx.complexfloating, complex, nx.nan + 0j), 

515 # Last, try with the string types (must be last, because 

516 # `_mapper[-1]` is used as default in some cases) 

517 (nx.str_, asunicode, '???'), 

518 (nx.bytes_, asbytes, '???'), 

519 ]) 

520 

521 @classmethod 

522 def _getdtype(cls, val): 

523 """Returns the dtype of the input variable.""" 

524 return np.array(val).dtype 

525 

526 @classmethod 

527 def _getsubdtype(cls, val): 

528 """Returns the type of the dtype of the input variable.""" 

529 return np.array(val).dtype.type 

530 

531 @classmethod 

532 def _dtypeortype(cls, dtype): 

533 """Returns dtype for datetime64 and type of dtype otherwise.""" 

534 

535 # This is a bit annoying. We want to return the "general" type in most 

536 # cases (ie. "string" rather than "S10"), but we want to return the 

537 # specific type for datetime64 (ie. "datetime64[us]" rather than 

538 # "datetime64"). 

539 if dtype.type == np.datetime64: 

540 return dtype 

541 return dtype.type 

542 

543 @classmethod 

544 def upgrade_mapper(cls, func, default=None): 

545 """ 

546 Upgrade the mapper of a StringConverter by adding a new function and 

547 its corresponding default. 

548 

549 The input function (or sequence of functions) and its associated 

550 default value (if any) is inserted in penultimate position of the 

551 mapper. The corresponding type is estimated from the dtype of the 

552 default value. 

553 

554 Parameters 

555 ---------- 

556 func : var 

557 Function, or sequence of functions 

558 

559 Examples 

560 -------- 

561 >>> import dateutil.parser 

562 >>> import datetime 

563 >>> dateparser = dateutil.parser.parse 

564 >>> defaultdate = datetime.date(2000, 1, 1) 

565 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) 

566 """ 

567 # Func is a single functions 

568 if callable(func): 

569 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) 

570 return 

571 elif hasattr(func, '__iter__'): 

572 if isinstance(func[0], (tuple, list)): 

573 for _ in func: 

574 cls._mapper.insert(-1, _) 

575 return 

576 if default is None: 

577 default = [None] * len(func) 

578 else: 

579 default = list(default) 

580 default.append([None] * (len(func) - len(default))) 

581 for fct, dft in zip(func, default): 

582 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) 

583 

584 @classmethod 

585 def _find_map_entry(cls, dtype): 

586 # if a converter for the specific dtype is available use that 

587 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

588 if dtype.type == deftype: 

589 return i, (deftype, func, default_def) 

590 

591 # otherwise find an inexact match 

592 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

593 if np.issubdtype(dtype.type, deftype): 

594 return i, (deftype, func, default_def) 

595 

596 raise LookupError 

597 

598 def __init__(self, dtype_or_func=None, default=None, missing_values=None, 

599 locked=False): 

600 # Defines a lock for upgrade 

601 self._locked = bool(locked) 

602 # No input dtype: minimal initialization 

603 if dtype_or_func is None: 

604 self.func = str2bool 

605 self._status = 0 

606 self.default = default or False 

607 dtype = np.dtype('bool') 

608 else: 

609 # Is the input a np.dtype ? 

610 try: 

611 self.func = None 

612 dtype = np.dtype(dtype_or_func) 

613 except TypeError: 

614 # dtype_or_func must be a function, then 

615 if not callable(dtype_or_func): 

616 errmsg = ("The input argument `dtype` is neither a" 

617 " function nor a dtype (got '%s' instead)") 

618 raise TypeError(errmsg % type(dtype_or_func)) 

619 # Set the function 

620 self.func = dtype_or_func 

621 # If we don't have a default, try to guess it or set it to 

622 # None 

623 if default is None: 

624 try: 

625 default = self.func('0') 

626 except ValueError: 

627 default = None 

628 dtype = self._getdtype(default) 

629 

630 # find the best match in our mapper 

631 try: 

632 self._status, (_, func, default_def) = self._find_map_entry(dtype) 

633 except LookupError: 

634 # no match 

635 self.default = default 

636 _, func, _ = self._mapper[-1] 

637 self._status = 0 

638 else: 

639 # use the found default only if we did not already have one 

640 if default is None: 

641 self.default = default_def 

642 else: 

643 self.default = default 

644 

645 # If the input was a dtype, set the function to the last we saw 

646 if self.func is None: 

647 self.func = func 

648 

649 # If the status is 1 (int), change the function to 

650 # something more robust. 

651 if self.func == self._mapper[1][1]: 

652 if issubclass(dtype.type, np.uint64): 

653 self.func = np.uint64 

654 elif issubclass(dtype.type, np.int64): 

655 self.func = np.int64 

656 else: 

657 self.func = lambda x: int(float(x)) 

658 # Store the list of strings corresponding to missing values. 

659 if missing_values is None: 

660 self.missing_values = {''} 

661 else: 

662 if isinstance(missing_values, str): 

663 missing_values = missing_values.split(",") 

664 self.missing_values = set(list(missing_values) + ['']) 

665 

666 self._callingfunction = self._strict_call 

667 self.type = self._dtypeortype(dtype) 

668 self._checked = False 

669 self._initial_default = default 

670 

671 def _loose_call(self, value): 

672 try: 

673 return self.func(value) 

674 except ValueError: 

675 return self.default 

676 

677 def _strict_call(self, value): 

678 try: 

679 

680 # We check if we can convert the value using the current function 

681 new_value = self.func(value) 

682 

683 # In addition to having to check whether func can convert the 

684 # value, we also have to make sure that we don't get overflow 

685 # errors for integers. 

686 if self.func is int: 

687 try: 

688 np.array(value, dtype=self.type) 

689 except OverflowError: 

690 raise ValueError 

691 

692 # We're still here so we can now return the new value 

693 return new_value 

694 

695 except ValueError: 

696 if value.strip() in self.missing_values: 

697 if not self._status: 

698 self._checked = False 

699 return self.default 

700 raise ValueError("Cannot convert string '%s'" % value) 

701 

702 def __call__(self, value): 

703 return self._callingfunction(value) 

704 

705 def _do_upgrade(self): 

706 # Raise an exception if we locked the converter... 

707 if self._locked: 

708 errmsg = "Converter is locked and cannot be upgraded" 

709 raise ConverterLockError(errmsg) 

710 _statusmax = len(self._mapper) 

711 # Complains if we try to upgrade by the maximum 

712 _status = self._status 

713 if _status == _statusmax: 

714 errmsg = "Could not find a valid conversion function" 

715 raise ConverterError(errmsg) 

716 elif _status < _statusmax - 1: 

717 _status += 1 

718 self.type, self.func, default = self._mapper[_status] 

719 self._status = _status 

720 if self._initial_default is not None: 

721 self.default = self._initial_default 

722 else: 

723 self.default = default 

724 

725 def upgrade(self, value): 

726 """ 

727 Find the best converter for a given string, and return the result. 

728 

729 The supplied string `value` is converted by testing different 

730 converters in order. First the `func` method of the 

731 `StringConverter` instance is tried, if this fails other available 

732 converters are tried. The order in which these other converters 

733 are tried is determined by the `_status` attribute of the instance. 

734 

735 Parameters 

736 ---------- 

737 value : str 

738 The string to convert. 

739 

740 Returns 

741 ------- 

742 out : any 

743 The result of converting `value` with the appropriate converter. 

744 

745 """ 

746 self._checked = True 

747 try: 

748 return self._strict_call(value) 

749 except ValueError: 

750 self._do_upgrade() 

751 return self.upgrade(value) 

752 

753 def iterupgrade(self, value): 

754 self._checked = True 

755 if not hasattr(value, '__iter__'): 

756 value = (value,) 

757 _strict_call = self._strict_call 

758 try: 

759 for _m in value: 

760 _strict_call(_m) 

761 except ValueError: 

762 self._do_upgrade() 

763 self.iterupgrade(value) 

764 

765 def update(self, func, default=None, testing_value=None, 

766 missing_values='', locked=False): 

767 """ 

768 Set StringConverter attributes directly. 

769 

770 Parameters 

771 ---------- 

772 func : function 

773 Conversion function. 

774 default : any, optional 

775 Value to return by default, that is, when the string to be 

776 converted is flagged as missing. If not given, 

777 `StringConverter` tries to supply a reasonable default value. 

778 testing_value : str, optional 

779 A string representing a standard input value of the converter. 

780 This string is used to help defining a reasonable default 

781 value. 

782 missing_values : {sequence of str, None}, optional 

783 Sequence of strings indicating a missing value. If ``None``, then 

784 the existing `missing_values` are cleared. The default is ``''``. 

785 locked : bool, optional 

786 Whether the StringConverter should be locked to prevent 

787 automatic upgrade or not. Default is False. 

788 

789 Notes 

790 ----- 

791 `update` takes the same parameters as the constructor of 

792 `StringConverter`, except that `func` does not accept a `dtype` 

793 whereas `dtype_or_func` in the constructor does. 

794 

795 """ 

796 self.func = func 

797 self._locked = locked 

798 

799 # Don't reset the default to None if we can avoid it 

800 if default is not None: 

801 self.default = default 

802 self.type = self._dtypeortype(self._getdtype(default)) 

803 else: 

804 try: 

805 tester = func(testing_value or '1') 

806 except (TypeError, ValueError): 

807 tester = None 

808 self.type = self._dtypeortype(self._getdtype(tester)) 

809 

810 # Add the missing values to the existing set or clear it. 

811 if missing_values is None: 

812 # Clear all missing values even though the ctor initializes it to 

813 # set(['']) when the argument is None. 

814 self.missing_values = set() 

815 else: 

816 if not np.iterable(missing_values): 

817 missing_values = [missing_values] 

818 if not all(isinstance(v, str) for v in missing_values): 

819 raise TypeError("missing_values must be strings or unicode") 

820 self.missing_values.update(missing_values) 

821 

822 

823def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): 

824 """ 

825 Convenience function to create a `np.dtype` object. 

826 

827 The function processes the input `dtype` and matches it with the given 

828 names. 

829 

830 Parameters 

831 ---------- 

832 ndtype : var 

833 Definition of the dtype. Can be any string or dictionary recognized 

834 by the `np.dtype` function, or a sequence of types. 

835 names : str or sequence, optional 

836 Sequence of strings to use as field names for a structured dtype. 

837 For convenience, `names` can be a string of a comma-separated list 

838 of names. 

839 defaultfmt : str, optional 

840 Format string used to define missing names, such as ``"f%i"`` 

841 (default) or ``"fields_%02i"``. 

842 validationargs : optional 

843 A series of optional arguments used to initialize a 

844 `NameValidator`. 

845 

846 Examples 

847 -------- 

848 >>> import numpy as np 

849 >>> np.lib._iotools.easy_dtype(float) 

850 dtype('float64') 

851 >>> np.lib._iotools.easy_dtype("i4, f8") 

852 dtype([('f0', '<i4'), ('f1', '<f8')]) 

853 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") 

854 dtype([('field_000', '<i4'), ('field_001', '<f8')]) 

855 

856 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") 

857 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')]) 

858 >>> np.lib._iotools.easy_dtype(float, names="a,b,c") 

859 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')]) 

860 

861 """ 

862 try: 

863 ndtype = np.dtype(ndtype) 

864 except TypeError: 

865 validate = NameValidator(**validationargs) 

866 nbfields = len(ndtype) 

867 if names is None: 

868 names = [''] * len(ndtype) 

869 elif isinstance(names, str): 

870 names = names.split(",") 

871 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) 

872 ndtype = np.dtype(dict(formats=ndtype, names=names)) 

873 else: 

874 # Explicit names 

875 if names is not None: 

876 validate = NameValidator(**validationargs) 

877 if isinstance(names, str): 

878 names = names.split(",") 

879 # Simple dtype: repeat to match the nb of names 

880 if ndtype.names is None: 

881 formats = tuple([ndtype.type] * len(names)) 

882 names = validate(names, defaultfmt=defaultfmt) 

883 ndtype = np.dtype(list(zip(names, formats))) 

884 # Structured dtype: just validate the names as needed 

885 else: 

886 ndtype.names = validate(names, nbfields=len(ndtype.names), 

887 defaultfmt=defaultfmt) 

888 # No implicit names 

889 elif ndtype.names is not None: 

890 validate = NameValidator(**validationargs) 

891 # Default initial names : should we change the format ? 

892 numbered_names = tuple("f%i" % i for i in range(len(ndtype.names))) 

893 if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")): 

894 ndtype.names = validate([''] * len(ndtype.names), 

895 defaultfmt=defaultfmt) 

896 # Explicit initial names : just validate 

897 else: 

898 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) 

899 return ndtype