Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/numpy/lib/_iotools.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

352 statements  

1"""A collection of functions designed to help I/O with ascii files. 

2 

3""" 

4__docformat__ = "restructuredtext en" 

5 

6import numpy as np 

7import numpy.core.numeric as nx 

8from numpy.compat import asbytes, asunicode 

9 

10 

11def _decode_line(line, encoding=None): 

12 """Decode bytes from binary input streams. 

13 

14 Defaults to decoding from 'latin1'. That differs from the behavior of 

15 np.compat.asunicode that decodes from 'ascii'. 

16 

17 Parameters 

18 ---------- 

19 line : str or bytes 

20 Line to be decoded. 

21 encoding : str 

22 Encoding used to decode `line`. 

23 

24 Returns 

25 ------- 

26 decoded_line : str 

27 

28 """ 

29 if type(line) is bytes: 

30 if encoding is None: 

31 encoding = "latin1" 

32 line = line.decode(encoding) 

33 

34 return line 

35 

36 

37def _is_string_like(obj): 

38 """ 

39 Check whether obj behaves like a string. 

40 """ 

41 try: 

42 obj + '' 

43 except (TypeError, ValueError): 

44 return False 

45 return True 

46 

47 

48def _is_bytes_like(obj): 

49 """ 

50 Check whether obj behaves like a bytes object. 

51 """ 

52 try: 

53 obj + b'' 

54 except (TypeError, ValueError): 

55 return False 

56 return True 

57 

58 

59def has_nested_fields(ndtype): 

60 """ 

61 Returns whether one or several fields of a dtype are nested. 

62 

63 Parameters 

64 ---------- 

65 ndtype : dtype 

66 Data-type of a structured array. 

67 

68 Raises 

69 ------ 

70 AttributeError 

71 If `ndtype` does not have a `names` attribute. 

72 

73 Examples 

74 -------- 

75 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) 

76 >>> np.lib._iotools.has_nested_fields(dt) 

77 False 

78 

79 """ 

80 for name in ndtype.names or (): 

81 if ndtype[name].names is not None: 

82 return True 

83 return False 

84 

85 

86def flatten_dtype(ndtype, flatten_base=False): 

87 """ 

88 Unpack a structured data-type by collapsing nested fields and/or fields 

89 with a shape. 

90 

91 Note that the field names are lost. 

92 

93 Parameters 

94 ---------- 

95 ndtype : dtype 

96 The datatype to collapse 

97 flatten_base : bool, optional 

98 If True, transform a field with a shape into several fields. Default is 

99 False. 

100 

101 Examples 

102 -------- 

103 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 

104 ... ('block', int, (2, 3))]) 

105 >>> np.lib._iotools.flatten_dtype(dt) 

106 [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] 

107 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) 

108 [dtype('S4'), 

109 dtype('float64'), 

110 dtype('float64'), 

111 dtype('int64'), 

112 dtype('int64'), 

113 dtype('int64'), 

114 dtype('int64'), 

115 dtype('int64'), 

116 dtype('int64')] 

117 

118 """ 

119 names = ndtype.names 

120 if names is None: 

121 if flatten_base: 

122 return [ndtype.base] * int(np.prod(ndtype.shape)) 

123 return [ndtype.base] 

124 else: 

125 types = [] 

126 for field in names: 

127 info = ndtype.fields[field] 

128 flat_dt = flatten_dtype(info[0], flatten_base) 

129 types.extend(flat_dt) 

130 return types 

131 

132 

133class LineSplitter: 

134 """ 

135 Object to split a string at a given delimiter or at given places. 

136 

137 Parameters 

138 ---------- 

139 delimiter : str, int, or sequence of ints, optional 

140 If a string, character used to delimit consecutive fields. 

141 If an integer or a sequence of integers, width(s) of each field. 

142 comments : str, optional 

143 Character used to mark the beginning of a comment. Default is '#'. 

144 autostrip : bool, optional 

145 Whether to strip each individual field. Default is True. 

146 

147 """ 

148 

149 def autostrip(self, method): 

150 """ 

151 Wrapper to strip each member of the output of `method`. 

152 

153 Parameters 

154 ---------- 

155 method : function 

156 Function that takes a single argument and returns a sequence of 

157 strings. 

158 

159 Returns 

160 ------- 

161 wrapped : function 

162 The result of wrapping `method`. `wrapped` takes a single input 

163 argument and returns a list of strings that are stripped of 

164 white-space. 

165 

166 """ 

167 return lambda input: [_.strip() for _ in method(input)] 

168 

169 def __init__(self, delimiter=None, comments='#', autostrip=True, 

170 encoding=None): 

171 delimiter = _decode_line(delimiter) 

172 comments = _decode_line(comments) 

173 

174 self.comments = comments 

175 

176 # Delimiter is a character 

177 if (delimiter is None) or isinstance(delimiter, str): 

178 delimiter = delimiter or None 

179 _handyman = self._delimited_splitter 

180 # Delimiter is a list of field widths 

181 elif hasattr(delimiter, '__iter__'): 

182 _handyman = self._variablewidth_splitter 

183 idx = np.cumsum([0] + list(delimiter)) 

184 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])] 

185 # Delimiter is a single integer 

186 elif int(delimiter): 

187 (_handyman, delimiter) = ( 

188 self._fixedwidth_splitter, int(delimiter)) 

189 else: 

190 (_handyman, delimiter) = (self._delimited_splitter, None) 

191 self.delimiter = delimiter 

192 if autostrip: 

193 self._handyman = self.autostrip(_handyman) 

194 else: 

195 self._handyman = _handyman 

196 self.encoding = encoding 

197 

198 def _delimited_splitter(self, line): 

199 """Chop off comments, strip, and split at delimiter. """ 

200 if self.comments is not None: 

201 line = line.split(self.comments)[0] 

202 line = line.strip(" \r\n") 

203 if not line: 

204 return [] 

205 return line.split(self.delimiter) 

206 

207 def _fixedwidth_splitter(self, line): 

208 if self.comments is not None: 

209 line = line.split(self.comments)[0] 

210 line = line.strip("\r\n") 

211 if not line: 

212 return [] 

213 fixed = self.delimiter 

214 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] 

215 return [line[s] for s in slices] 

216 

217 def _variablewidth_splitter(self, line): 

218 if self.comments is not None: 

219 line = line.split(self.comments)[0] 

220 if not line: 

221 return [] 

222 slices = self.delimiter 

223 return [line[s] for s in slices] 

224 

225 def __call__(self, line): 

226 return self._handyman(_decode_line(line, self.encoding)) 

227 

228 

229class NameValidator: 

230 """ 

231 Object to validate a list of strings to use as field names. 

232 

233 The strings are stripped of any non alphanumeric character, and spaces 

234 are replaced by '_'. During instantiation, the user can define a list 

235 of names to exclude, as well as a list of invalid characters. Names in 

236 the exclusion list are appended a '_' character. 

237 

238 Once an instance has been created, it can be called with a list of 

239 names, and a list of valid names will be created. The `__call__` 

240 method accepts an optional keyword "default" that sets the default name 

241 in case of ambiguity. By default this is 'f', so that names will 

242 default to `f0`, `f1`, etc. 

243 

244 Parameters 

245 ---------- 

246 excludelist : sequence, optional 

247 A list of names to exclude. This list is appended to the default 

248 list ['return', 'file', 'print']. Excluded names are appended an 

249 underscore: for example, `file` becomes `file_` if supplied. 

250 deletechars : str, optional 

251 A string combining invalid characters that must be deleted from the 

252 names. 

253 case_sensitive : {True, False, 'upper', 'lower'}, optional 

254 * If True, field names are case-sensitive. 

255 * If False or 'upper', field names are converted to upper case. 

256 * If 'lower', field names are converted to lower case. 

257 

258 The default value is True. 

259 replace_space : '_', optional 

260 Character(s) used in replacement of white spaces. 

261 

262 Notes 

263 ----- 

264 Calling an instance of `NameValidator` is the same as calling its 

265 method `validate`. 

266 

267 Examples 

268 -------- 

269 >>> validator = np.lib._iotools.NameValidator() 

270 >>> validator(['file', 'field2', 'with space', 'CaSe']) 

271 ('file_', 'field2', 'with_space', 'CaSe') 

272 

273 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], 

274 ... deletechars='q', 

275 ... case_sensitive=False) 

276 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) 

277 ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') 

278 

279 """ 

280 

281 defaultexcludelist = ['return', 'file', 'print'] 

282 defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") 

283 

284 def __init__(self, excludelist=None, deletechars=None, 

285 case_sensitive=None, replace_space='_'): 

286 # Process the exclusion list .. 

287 if excludelist is None: 

288 excludelist = [] 

289 excludelist.extend(self.defaultexcludelist) 

290 self.excludelist = excludelist 

291 # Process the list of characters to delete 

292 if deletechars is None: 

293 delete = self.defaultdeletechars 

294 else: 

295 delete = set(deletechars) 

296 delete.add('"') 

297 self.deletechars = delete 

298 # Process the case option ..... 

299 if (case_sensitive is None) or (case_sensitive is True): 

300 self.case_converter = lambda x: x 

301 elif (case_sensitive is False) or case_sensitive.startswith('u'): 

302 self.case_converter = lambda x: x.upper() 

303 elif case_sensitive.startswith('l'): 

304 self.case_converter = lambda x: x.lower() 

305 else: 

306 msg = 'unrecognized case_sensitive value %s.' % case_sensitive 

307 raise ValueError(msg) 

308 

309 self.replace_space = replace_space 

310 

311 def validate(self, names, defaultfmt="f%i", nbfields=None): 

312 """ 

313 Validate a list of strings as field names for a structured array. 

314 

315 Parameters 

316 ---------- 

317 names : sequence of str 

318 Strings to be validated. 

319 defaultfmt : str, optional 

320 Default format string, used if validating a given string 

321 reduces its length to zero. 

322 nbfields : integer, optional 

323 Final number of validated names, used to expand or shrink the 

324 initial list of names. 

325 

326 Returns 

327 ------- 

328 validatednames : list of str 

329 The list of validated field names. 

330 

331 Notes 

332 ----- 

333 A `NameValidator` instance can be called directly, which is the 

334 same as calling `validate`. For examples, see `NameValidator`. 

335 

336 """ 

337 # Initial checks .............. 

338 if (names is None): 

339 if (nbfields is None): 

340 return None 

341 names = [] 

342 if isinstance(names, str): 

343 names = [names, ] 

344 if nbfields is not None: 

345 nbnames = len(names) 

346 if (nbnames < nbfields): 

347 names = list(names) + [''] * (nbfields - nbnames) 

348 elif (nbnames > nbfields): 

349 names = names[:nbfields] 

350 # Set some shortcuts ........... 

351 deletechars = self.deletechars 

352 excludelist = self.excludelist 

353 case_converter = self.case_converter 

354 replace_space = self.replace_space 

355 # Initializes some variables ... 

356 validatednames = [] 

357 seen = dict() 

358 nbempty = 0 

359 

360 for item in names: 

361 item = case_converter(item).strip() 

362 if replace_space: 

363 item = item.replace(' ', replace_space) 

364 item = ''.join([c for c in item if c not in deletechars]) 

365 if item == '': 

366 item = defaultfmt % nbempty 

367 while item in names: 

368 nbempty += 1 

369 item = defaultfmt % nbempty 

370 nbempty += 1 

371 elif item in excludelist: 

372 item += '_' 

373 cnt = seen.get(item, 0) 

374 if cnt > 0: 

375 validatednames.append(item + '_%d' % cnt) 

376 else: 

377 validatednames.append(item) 

378 seen[item] = cnt + 1 

379 return tuple(validatednames) 

380 

381 def __call__(self, names, defaultfmt="f%i", nbfields=None): 

382 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) 

383 

384 

385def str2bool(value): 

386 """ 

387 Tries to transform a string supposed to represent a boolean to a boolean. 

388 

389 Parameters 

390 ---------- 

391 value : str 

392 The string that is transformed to a boolean. 

393 

394 Returns 

395 ------- 

396 boolval : bool 

397 The boolean representation of `value`. 

398 

399 Raises 

400 ------ 

401 ValueError 

402 If the string is not 'True' or 'False' (case independent) 

403 

404 Examples 

405 -------- 

406 >>> np.lib._iotools.str2bool('TRUE') 

407 True 

408 >>> np.lib._iotools.str2bool('false') 

409 False 

410 

411 """ 

412 value = value.upper() 

413 if value == 'TRUE': 

414 return True 

415 elif value == 'FALSE': 

416 return False 

417 else: 

418 raise ValueError("Invalid boolean") 

419 

420 

421class ConverterError(Exception): 

422 """ 

423 Exception raised when an error occurs in a converter for string values. 

424 

425 """ 

426 pass 

427 

428 

429class ConverterLockError(ConverterError): 

430 """ 

431 Exception raised when an attempt is made to upgrade a locked converter. 

432 

433 """ 

434 pass 

435 

436 

437class ConversionWarning(UserWarning): 

438 """ 

439 Warning issued when a string converter has a problem. 

440 

441 Notes 

442 ----- 

443 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions 

444 is explicitly suppressed with the "invalid_raise" keyword. 

445 

446 """ 

447 pass 

448 

449 

450class StringConverter: 

451 """ 

452 Factory class for function transforming a string into another object 

453 (int, float). 

454 

455 After initialization, an instance can be called to transform a string 

456 into another object. If the string is recognized as representing a 

457 missing value, a default value is returned. 

458 

459 Attributes 

460 ---------- 

461 func : function 

462 Function used for the conversion. 

463 default : any 

464 Default value to return when the input corresponds to a missing 

465 value. 

466 type : type 

467 Type of the output. 

468 _status : int 

469 Integer representing the order of the conversion. 

470 _mapper : sequence of tuples 

471 Sequence of tuples (dtype, function, default value) to evaluate in 

472 order. 

473 _locked : bool 

474 Holds `locked` parameter. 

475 

476 Parameters 

477 ---------- 

478 dtype_or_func : {None, dtype, function}, optional 

479 If a `dtype`, specifies the input data type, used to define a basic 

480 function and a default value for missing data. For example, when 

481 `dtype` is float, the `func` attribute is set to `float` and the 

482 default value to `np.nan`. If a function, this function is used to 

483 convert a string to another object. In this case, it is recommended 

484 to give an associated default value as input. 

485 default : any, optional 

486 Value to return by default, that is, when the string to be 

487 converted is flagged as missing. If not given, `StringConverter` 

488 tries to supply a reasonable default value. 

489 missing_values : {None, sequence of str}, optional 

490 ``None`` or sequence of strings indicating a missing value. If ``None`` 

491 then missing values are indicated by empty entries. The default is 

492 ``None``. 

493 locked : bool, optional 

494 Whether the StringConverter should be locked to prevent automatic 

495 upgrade or not. Default is False. 

496 

497 """ 

498 _mapper = [(nx.bool_, str2bool, False), 

499 (nx.int_, int, -1),] 

500 

501 # On 32-bit systems, we need to make sure that we explicitly include 

502 # nx.int64 since ns.int_ is nx.int32. 

503 if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: 

504 _mapper.append((nx.int64, int, -1)) 

505 

506 _mapper.extend([(nx.float64, float, nx.nan), 

507 (nx.complex128, complex, nx.nan + 0j), 

508 (nx.longdouble, nx.longdouble, nx.nan), 

509 # If a non-default dtype is passed, fall back to generic 

510 # ones (should only be used for the converter) 

511 (nx.integer, int, -1), 

512 (nx.floating, float, nx.nan), 

513 (nx.complexfloating, complex, nx.nan + 0j), 

514 # Last, try with the string types (must be last, because 

515 # `_mapper[-1]` is used as default in some cases) 

516 (nx.unicode_, asunicode, '???'), 

517 (nx.string_, asbytes, '???'), 

518 ]) 

519 

520 @classmethod 

521 def _getdtype(cls, val): 

522 """Returns the dtype of the input variable.""" 

523 return np.array(val).dtype 

524 

525 @classmethod 

526 def _getsubdtype(cls, val): 

527 """Returns the type of the dtype of the input variable.""" 

528 return np.array(val).dtype.type 

529 

530 @classmethod 

531 def _dtypeortype(cls, dtype): 

532 """Returns dtype for datetime64 and type of dtype otherwise.""" 

533 

534 # This is a bit annoying. We want to return the "general" type in most 

535 # cases (ie. "string" rather than "S10"), but we want to return the 

536 # specific type for datetime64 (ie. "datetime64[us]" rather than 

537 # "datetime64"). 

538 if dtype.type == np.datetime64: 

539 return dtype 

540 return dtype.type 

541 

542 @classmethod 

543 def upgrade_mapper(cls, func, default=None): 

544 """ 

545 Upgrade the mapper of a StringConverter by adding a new function and 

546 its corresponding default. 

547 

548 The input function (or sequence of functions) and its associated 

549 default value (if any) is inserted in penultimate position of the 

550 mapper. The corresponding type is estimated from the dtype of the 

551 default value. 

552 

553 Parameters 

554 ---------- 

555 func : var 

556 Function, or sequence of functions 

557 

558 Examples 

559 -------- 

560 >>> import dateutil.parser 

561 >>> import datetime 

562 >>> dateparser = dateutil.parser.parse 

563 >>> defaultdate = datetime.date(2000, 1, 1) 

564 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) 

565 """ 

566 # Func is a single functions 

567 if hasattr(func, '__call__'): 

568 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) 

569 return 

570 elif hasattr(func, '__iter__'): 

571 if isinstance(func[0], (tuple, list)): 

572 for _ in func: 

573 cls._mapper.insert(-1, _) 

574 return 

575 if default is None: 

576 default = [None] * len(func) 

577 else: 

578 default = list(default) 

579 default.append([None] * (len(func) - len(default))) 

580 for fct, dft in zip(func, default): 

581 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) 

582 

583 @classmethod 

584 def _find_map_entry(cls, dtype): 

585 # if a converter for the specific dtype is available use that 

586 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

587 if dtype.type == deftype: 

588 return i, (deftype, func, default_def) 

589 

590 # otherwise find an inexact match 

591 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

592 if np.issubdtype(dtype.type, deftype): 

593 return i, (deftype, func, default_def) 

594 

595 raise LookupError 

596 

597 def __init__(self, dtype_or_func=None, default=None, missing_values=None, 

598 locked=False): 

599 # Defines a lock for upgrade 

600 self._locked = bool(locked) 

601 # No input dtype: minimal initialization 

602 if dtype_or_func is None: 

603 self.func = str2bool 

604 self._status = 0 

605 self.default = default or False 

606 dtype = np.dtype('bool') 

607 else: 

608 # Is the input a np.dtype ? 

609 try: 

610 self.func = None 

611 dtype = np.dtype(dtype_or_func) 

612 except TypeError: 

613 # dtype_or_func must be a function, then 

614 if not hasattr(dtype_or_func, '__call__'): 

615 errmsg = ("The input argument `dtype` is neither a" 

616 " function nor a dtype (got '%s' instead)") 

617 raise TypeError(errmsg % type(dtype_or_func)) 

618 # Set the function 

619 self.func = dtype_or_func 

620 # If we don't have a default, try to guess it or set it to 

621 # None 

622 if default is None: 

623 try: 

624 default = self.func('0') 

625 except ValueError: 

626 default = None 

627 dtype = self._getdtype(default) 

628 

629 # find the best match in our mapper 

630 try: 

631 self._status, (_, func, default_def) = self._find_map_entry(dtype) 

632 except LookupError: 

633 # no match 

634 self.default = default 

635 _, func, _ = self._mapper[-1] 

636 self._status = 0 

637 else: 

638 # use the found default only if we did not already have one 

639 if default is None: 

640 self.default = default_def 

641 else: 

642 self.default = default 

643 

644 # If the input was a dtype, set the function to the last we saw 

645 if self.func is None: 

646 self.func = func 

647 

648 # If the status is 1 (int), change the function to 

649 # something more robust. 

650 if self.func == self._mapper[1][1]: 

651 if issubclass(dtype.type, np.uint64): 

652 self.func = np.uint64 

653 elif issubclass(dtype.type, np.int64): 

654 self.func = np.int64 

655 else: 

656 self.func = lambda x: int(float(x)) 

657 # Store the list of strings corresponding to missing values. 

658 if missing_values is None: 

659 self.missing_values = {''} 

660 else: 

661 if isinstance(missing_values, str): 

662 missing_values = missing_values.split(",") 

663 self.missing_values = set(list(missing_values) + ['']) 

664 

665 self._callingfunction = self._strict_call 

666 self.type = self._dtypeortype(dtype) 

667 self._checked = False 

668 self._initial_default = default 

669 

670 def _loose_call(self, value): 

671 try: 

672 return self.func(value) 

673 except ValueError: 

674 return self.default 

675 

676 def _strict_call(self, value): 

677 try: 

678 

679 # We check if we can convert the value using the current function 

680 new_value = self.func(value) 

681 

682 # In addition to having to check whether func can convert the 

683 # value, we also have to make sure that we don't get overflow 

684 # errors for integers. 

685 if self.func is int: 

686 try: 

687 np.array(value, dtype=self.type) 

688 except OverflowError: 

689 raise ValueError 

690 

691 # We're still here so we can now return the new value 

692 return new_value 

693 

694 except ValueError: 

695 if value.strip() in self.missing_values: 

696 if not self._status: 

697 self._checked = False 

698 return self.default 

699 raise ValueError("Cannot convert string '%s'" % value) 

700 

701 def __call__(self, value): 

702 return self._callingfunction(value) 

703 

704 def _do_upgrade(self): 

705 # Raise an exception if we locked the converter... 

706 if self._locked: 

707 errmsg = "Converter is locked and cannot be upgraded" 

708 raise ConverterLockError(errmsg) 

709 _statusmax = len(self._mapper) 

710 # Complains if we try to upgrade by the maximum 

711 _status = self._status 

712 if _status == _statusmax: 

713 errmsg = "Could not find a valid conversion function" 

714 raise ConverterError(errmsg) 

715 elif _status < _statusmax - 1: 

716 _status += 1 

717 self.type, self.func, default = self._mapper[_status] 

718 self._status = _status 

719 if self._initial_default is not None: 

720 self.default = self._initial_default 

721 else: 

722 self.default = default 

723 

724 def upgrade(self, value): 

725 """ 

726 Find the best converter for a given string, and return the result. 

727 

728 The supplied string `value` is converted by testing different 

729 converters in order. First the `func` method of the 

730 `StringConverter` instance is tried, if this fails other available 

731 converters are tried. The order in which these other converters 

732 are tried is determined by the `_status` attribute of the instance. 

733 

734 Parameters 

735 ---------- 

736 value : str 

737 The string to convert. 

738 

739 Returns 

740 ------- 

741 out : any 

742 The result of converting `value` with the appropriate converter. 

743 

744 """ 

745 self._checked = True 

746 try: 

747 return self._strict_call(value) 

748 except ValueError: 

749 self._do_upgrade() 

750 return self.upgrade(value) 

751 

752 def iterupgrade(self, value): 

753 self._checked = True 

754 if not hasattr(value, '__iter__'): 

755 value = (value,) 

756 _strict_call = self._strict_call 

757 try: 

758 for _m in value: 

759 _strict_call(_m) 

760 except ValueError: 

761 self._do_upgrade() 

762 self.iterupgrade(value) 

763 

764 def update(self, func, default=None, testing_value=None, 

765 missing_values='', locked=False): 

766 """ 

767 Set StringConverter attributes directly. 

768 

769 Parameters 

770 ---------- 

771 func : function 

772 Conversion function. 

773 default : any, optional 

774 Value to return by default, that is, when the string to be 

775 converted is flagged as missing. If not given, 

776 `StringConverter` tries to supply a reasonable default value. 

777 testing_value : str, optional 

778 A string representing a standard input value of the converter. 

779 This string is used to help defining a reasonable default 

780 value. 

781 missing_values : {sequence of str, None}, optional 

782 Sequence of strings indicating a missing value. If ``None``, then 

783 the existing `missing_values` are cleared. The default is `''`. 

784 locked : bool, optional 

785 Whether the StringConverter should be locked to prevent 

786 automatic upgrade or not. Default is False. 

787 

788 Notes 

789 ----- 

790 `update` takes the same parameters as the constructor of 

791 `StringConverter`, except that `func` does not accept a `dtype` 

792 whereas `dtype_or_func` in the constructor does. 

793 

794 """ 

795 self.func = func 

796 self._locked = locked 

797 

798 # Don't reset the default to None if we can avoid it 

799 if default is not None: 

800 self.default = default 

801 self.type = self._dtypeortype(self._getdtype(default)) 

802 else: 

803 try: 

804 tester = func(testing_value or '1') 

805 except (TypeError, ValueError): 

806 tester = None 

807 self.type = self._dtypeortype(self._getdtype(tester)) 

808 

809 # Add the missing values to the existing set or clear it. 

810 if missing_values is None: 

811 # Clear all missing values even though the ctor initializes it to 

812 # set(['']) when the argument is None. 

813 self.missing_values = set() 

814 else: 

815 if not np.iterable(missing_values): 

816 missing_values = [missing_values] 

817 if not all(isinstance(v, str) for v in missing_values): 

818 raise TypeError("missing_values must be strings or unicode") 

819 self.missing_values.update(missing_values) 

820 

821 

822def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): 

823 """ 

824 Convenience function to create a `np.dtype` object. 

825 

826 The function processes the input `dtype` and matches it with the given 

827 names. 

828 

829 Parameters 

830 ---------- 

831 ndtype : var 

832 Definition of the dtype. Can be any string or dictionary recognized 

833 by the `np.dtype` function, or a sequence of types. 

834 names : str or sequence, optional 

835 Sequence of strings to use as field names for a structured dtype. 

836 For convenience, `names` can be a string of a comma-separated list 

837 of names. 

838 defaultfmt : str, optional 

839 Format string used to define missing names, such as ``"f%i"`` 

840 (default) or ``"fields_%02i"``. 

841 validationargs : optional 

842 A series of optional arguments used to initialize a 

843 `NameValidator`. 

844 

845 Examples 

846 -------- 

847 >>> np.lib._iotools.easy_dtype(float) 

848 dtype('float64') 

849 >>> np.lib._iotools.easy_dtype("i4, f8") 

850 dtype([('f0', '<i4'), ('f1', '<f8')]) 

851 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") 

852 dtype([('field_000', '<i4'), ('field_001', '<f8')]) 

853 

854 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") 

855 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')]) 

856 >>> np.lib._iotools.easy_dtype(float, names="a,b,c") 

857 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')]) 

858 

859 """ 

860 try: 

861 ndtype = np.dtype(ndtype) 

862 except TypeError: 

863 validate = NameValidator(**validationargs) 

864 nbfields = len(ndtype) 

865 if names is None: 

866 names = [''] * len(ndtype) 

867 elif isinstance(names, str): 

868 names = names.split(",") 

869 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) 

870 ndtype = np.dtype(dict(formats=ndtype, names=names)) 

871 else: 

872 # Explicit names 

873 if names is not None: 

874 validate = NameValidator(**validationargs) 

875 if isinstance(names, str): 

876 names = names.split(",") 

877 # Simple dtype: repeat to match the nb of names 

878 if ndtype.names is None: 

879 formats = tuple([ndtype.type] * len(names)) 

880 names = validate(names, defaultfmt=defaultfmt) 

881 ndtype = np.dtype(list(zip(names, formats))) 

882 # Structured dtype: just validate the names as needed 

883 else: 

884 ndtype.names = validate(names, nbfields=len(ndtype.names), 

885 defaultfmt=defaultfmt) 

886 # No implicit names 

887 elif ndtype.names is not None: 

888 validate = NameValidator(**validationargs) 

889 # Default initial names : should we change the format ? 

890 numbered_names = tuple("f%i" % i for i in range(len(ndtype.names))) 

891 if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")): 

892 ndtype.names = validate([''] * len(ndtype.names), 

893 defaultfmt=defaultfmt) 

894 # Explicit initial names : just validate 

895 else: 

896 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) 

897 return ndtype