1"""A collection of functions designed to help I/O with ascii files.
2
3"""
4__docformat__ = "restructuredtext en"
5
6import numpy as np
7import numpy._core.numeric as nx
8from numpy._utils import asbytes, asunicode
9
10
11def _decode_line(line, encoding=None):
12 """Decode bytes from binary input streams.
13
14 Defaults to decoding from 'latin1'. That differs from the behavior of
15 np.compat.asunicode that decodes from 'ascii'.
16
17 Parameters
18 ----------
19 line : str or bytes
20 Line to be decoded.
21 encoding : str
22 Encoding used to decode `line`.
23
24 Returns
25 -------
26 decoded_line : str
27
28 """
29 if type(line) is bytes:
30 if encoding is None:
31 encoding = "latin1"
32 line = line.decode(encoding)
33
34 return line
35
36
37def _is_string_like(obj):
38 """
39 Check whether obj behaves like a string.
40 """
41 try:
42 obj + ''
43 except (TypeError, ValueError):
44 return False
45 return True
46
47
48def _is_bytes_like(obj):
49 """
50 Check whether obj behaves like a bytes object.
51 """
52 try:
53 obj + b''
54 except (TypeError, ValueError):
55 return False
56 return True
57
58
59def has_nested_fields(ndtype):
60 """
61 Returns whether one or several fields of a dtype are nested.
62
63 Parameters
64 ----------
65 ndtype : dtype
66 Data-type of a structured array.
67
68 Raises
69 ------
70 AttributeError
71 If `ndtype` does not have a `names` attribute.
72
73 Examples
74 --------
75 >>> import numpy as np
76 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
77 >>> np.lib._iotools.has_nested_fields(dt)
78 False
79
80 """
81 return any(ndtype[name].names is not None for name in ndtype.names or ())
82
83
84def flatten_dtype(ndtype, flatten_base=False):
85 """
86 Unpack a structured data-type by collapsing nested fields and/or fields
87 with a shape.
88
89 Note that the field names are lost.
90
91 Parameters
92 ----------
93 ndtype : dtype
94 The datatype to collapse
95 flatten_base : bool, optional
96 If True, transform a field with a shape into several fields. Default is
97 False.
98
99 Examples
100 --------
101 >>> import numpy as np
102 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
103 ... ('block', int, (2, 3))])
104 >>> np.lib._iotools.flatten_dtype(dt)
105 [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')]
106 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
107 [dtype('S4'),
108 dtype('float64'),
109 dtype('float64'),
110 dtype('int64'),
111 dtype('int64'),
112 dtype('int64'),
113 dtype('int64'),
114 dtype('int64'),
115 dtype('int64')]
116
117 """
118 names = ndtype.names
119 if names is None:
120 if flatten_base:
121 return [ndtype.base] * int(np.prod(ndtype.shape))
122 return [ndtype.base]
123 else:
124 types = []
125 for field in names:
126 info = ndtype.fields[field]
127 flat_dt = flatten_dtype(info[0], flatten_base)
128 types.extend(flat_dt)
129 return types
130
131
132class LineSplitter:
133 """
134 Object to split a string at a given delimiter or at given places.
135
136 Parameters
137 ----------
138 delimiter : str, int, or sequence of ints, optional
139 If a string, character used to delimit consecutive fields.
140 If an integer or a sequence of integers, width(s) of each field.
141 comments : str, optional
142 Character used to mark the beginning of a comment. Default is '#'.
143 autostrip : bool, optional
144 Whether to strip each individual field. Default is True.
145
146 """
147
148 def autostrip(self, method):
149 """
150 Wrapper to strip each member of the output of `method`.
151
152 Parameters
153 ----------
154 method : function
155 Function that takes a single argument and returns a sequence of
156 strings.
157
158 Returns
159 -------
160 wrapped : function
161 The result of wrapping `method`. `wrapped` takes a single input
162 argument and returns a list of strings that are stripped of
163 white-space.
164
165 """
166 return lambda input: [_.strip() for _ in method(input)]
167
168 def __init__(self, delimiter=None, comments='#', autostrip=True,
169 encoding=None):
170 delimiter = _decode_line(delimiter)
171 comments = _decode_line(comments)
172
173 self.comments = comments
174
175 # Delimiter is a character
176 if (delimiter is None) or isinstance(delimiter, str):
177 delimiter = delimiter or None
178 _handyman = self._delimited_splitter
179 # Delimiter is a list of field widths
180 elif hasattr(delimiter, '__iter__'):
181 _handyman = self._variablewidth_splitter
182 idx = np.cumsum([0] + list(delimiter))
183 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
184 # Delimiter is a single integer
185 elif int(delimiter):
186 (_handyman, delimiter) = (
187 self._fixedwidth_splitter, int(delimiter))
188 else:
189 (_handyman, delimiter) = (self._delimited_splitter, None)
190 self.delimiter = delimiter
191 if autostrip:
192 self._handyman = self.autostrip(_handyman)
193 else:
194 self._handyman = _handyman
195 self.encoding = encoding
196
197 def _delimited_splitter(self, line):
198 """Chop off comments, strip, and split at delimiter. """
199 if self.comments is not None:
200 line = line.split(self.comments)[0]
201 line = line.strip(" \r\n")
202 if not line:
203 return []
204 return line.split(self.delimiter)
205
206 def _fixedwidth_splitter(self, line):
207 if self.comments is not None:
208 line = line.split(self.comments)[0]
209 line = line.strip("\r\n")
210 if not line:
211 return []
212 fixed = self.delimiter
213 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
214 return [line[s] for s in slices]
215
216 def _variablewidth_splitter(self, line):
217 if self.comments is not None:
218 line = line.split(self.comments)[0]
219 if not line:
220 return []
221 slices = self.delimiter
222 return [line[s] for s in slices]
223
224 def __call__(self, line):
225 return self._handyman(_decode_line(line, self.encoding))
226
227
228class NameValidator:
229 """
230 Object to validate a list of strings to use as field names.
231
232 The strings are stripped of any non alphanumeric character, and spaces
233 are replaced by '_'. During instantiation, the user can define a list
234 of names to exclude, as well as a list of invalid characters. Names in
235 the exclusion list are appended a '_' character.
236
237 Once an instance has been created, it can be called with a list of
238 names, and a list of valid names will be created. The `__call__`
239 method accepts an optional keyword "default" that sets the default name
240 in case of ambiguity. By default this is 'f', so that names will
241 default to `f0`, `f1`, etc.
242
243 Parameters
244 ----------
245 excludelist : sequence, optional
246 A list of names to exclude. This list is appended to the default
247 list ['return', 'file', 'print']. Excluded names are appended an
248 underscore: for example, `file` becomes `file_` if supplied.
249 deletechars : str, optional
250 A string combining invalid characters that must be deleted from the
251 names.
252 case_sensitive : {True, False, 'upper', 'lower'}, optional
253 * If True, field names are case-sensitive.
254 * If False or 'upper', field names are converted to upper case.
255 * If 'lower', field names are converted to lower case.
256
257 The default value is True.
258 replace_space : '_', optional
259 Character(s) used in replacement of white spaces.
260
261 Notes
262 -----
263 Calling an instance of `NameValidator` is the same as calling its
264 method `validate`.
265
266 Examples
267 --------
268 >>> import numpy as np
269 >>> validator = np.lib._iotools.NameValidator()
270 >>> validator(['file', 'field2', 'with space', 'CaSe'])
271 ('file_', 'field2', 'with_space', 'CaSe')
272
273 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
274 ... deletechars='q',
275 ... case_sensitive=False)
276 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
277 ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE')
278
279 """
280
281 defaultexcludelist = ['return', 'file', 'print']
282 defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
283
284 def __init__(self, excludelist=None, deletechars=None,
285 case_sensitive=None, replace_space='_'):
286 # Process the exclusion list ..
287 if excludelist is None:
288 excludelist = []
289 excludelist.extend(self.defaultexcludelist)
290 self.excludelist = excludelist
291 # Process the list of characters to delete
292 if deletechars is None:
293 delete = self.defaultdeletechars
294 else:
295 delete = set(deletechars)
296 delete.add('"')
297 self.deletechars = delete
298 # Process the case option .....
299 if (case_sensitive is None) or (case_sensitive is True):
300 self.case_converter = lambda x: x
301 elif (case_sensitive is False) or case_sensitive.startswith('u'):
302 self.case_converter = lambda x: x.upper()
303 elif case_sensitive.startswith('l'):
304 self.case_converter = lambda x: x.lower()
305 else:
306 msg = 'unrecognized case_sensitive value %s.' % case_sensitive
307 raise ValueError(msg)
308
309 self.replace_space = replace_space
310
311 def validate(self, names, defaultfmt="f%i", nbfields=None):
312 """
313 Validate a list of strings as field names for a structured array.
314
315 Parameters
316 ----------
317 names : sequence of str
318 Strings to be validated.
319 defaultfmt : str, optional
320 Default format string, used if validating a given string
321 reduces its length to zero.
322 nbfields : integer, optional
323 Final number of validated names, used to expand or shrink the
324 initial list of names.
325
326 Returns
327 -------
328 validatednames : list of str
329 The list of validated field names.
330
331 Notes
332 -----
333 A `NameValidator` instance can be called directly, which is the
334 same as calling `validate`. For examples, see `NameValidator`.
335
336 """
337 # Initial checks ..............
338 if (names is None):
339 if (nbfields is None):
340 return None
341 names = []
342 if isinstance(names, str):
343 names = [names, ]
344 if nbfields is not None:
345 nbnames = len(names)
346 if (nbnames < nbfields):
347 names = list(names) + [''] * (nbfields - nbnames)
348 elif (nbnames > nbfields):
349 names = names[:nbfields]
350 # Set some shortcuts ...........
351 deletechars = self.deletechars
352 excludelist = self.excludelist
353 case_converter = self.case_converter
354 replace_space = self.replace_space
355 # Initializes some variables ...
356 validatednames = []
357 seen = dict()
358 nbempty = 0
359
360 for item in names:
361 item = case_converter(item).strip()
362 if replace_space:
363 item = item.replace(' ', replace_space)
364 item = ''.join([c for c in item if c not in deletechars])
365 if item == '':
366 item = defaultfmt % nbempty
367 while item in names:
368 nbempty += 1
369 item = defaultfmt % nbempty
370 nbempty += 1
371 elif item in excludelist:
372 item += '_'
373 cnt = seen.get(item, 0)
374 if cnt > 0:
375 validatednames.append(item + '_%d' % cnt)
376 else:
377 validatednames.append(item)
378 seen[item] = cnt + 1
379 return tuple(validatednames)
380
381 def __call__(self, names, defaultfmt="f%i", nbfields=None):
382 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
383
384
385def str2bool(value):
386 """
387 Tries to transform a string supposed to represent a boolean to a boolean.
388
389 Parameters
390 ----------
391 value : str
392 The string that is transformed to a boolean.
393
394 Returns
395 -------
396 boolval : bool
397 The boolean representation of `value`.
398
399 Raises
400 ------
401 ValueError
402 If the string is not 'True' or 'False' (case independent)
403
404 Examples
405 --------
406 >>> import numpy as np
407 >>> np.lib._iotools.str2bool('TRUE')
408 True
409 >>> np.lib._iotools.str2bool('false')
410 False
411
412 """
413 value = value.upper()
414 if value == 'TRUE':
415 return True
416 elif value == 'FALSE':
417 return False
418 else:
419 raise ValueError("Invalid boolean")
420
421
422class ConverterError(Exception):
423 """
424 Exception raised when an error occurs in a converter for string values.
425
426 """
427 pass
428
429
430class ConverterLockError(ConverterError):
431 """
432 Exception raised when an attempt is made to upgrade a locked converter.
433
434 """
435 pass
436
437
438class ConversionWarning(UserWarning):
439 """
440 Warning issued when a string converter has a problem.
441
442 Notes
443 -----
444 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
445 is explicitly suppressed with the "invalid_raise" keyword.
446
447 """
448 pass
449
450
451class StringConverter:
452 """
453 Factory class for function transforming a string into another object
454 (int, float).
455
456 After initialization, an instance can be called to transform a string
457 into another object. If the string is recognized as representing a
458 missing value, a default value is returned.
459
460 Attributes
461 ----------
462 func : function
463 Function used for the conversion.
464 default : any
465 Default value to return when the input corresponds to a missing
466 value.
467 type : type
468 Type of the output.
469 _status : int
470 Integer representing the order of the conversion.
471 _mapper : sequence of tuples
472 Sequence of tuples (dtype, function, default value) to evaluate in
473 order.
474 _locked : bool
475 Holds `locked` parameter.
476
477 Parameters
478 ----------
479 dtype_or_func : {None, dtype, function}, optional
480 If a `dtype`, specifies the input data type, used to define a basic
481 function and a default value for missing data. For example, when
482 `dtype` is float, the `func` attribute is set to `float` and the
483 default value to `np.nan`. If a function, this function is used to
484 convert a string to another object. In this case, it is recommended
485 to give an associated default value as input.
486 default : any, optional
487 Value to return by default, that is, when the string to be
488 converted is flagged as missing. If not given, `StringConverter`
489 tries to supply a reasonable default value.
490 missing_values : {None, sequence of str}, optional
491 ``None`` or sequence of strings indicating a missing value. If ``None``
492 then missing values are indicated by empty entries. The default is
493 ``None``.
494 locked : bool, optional
495 Whether the StringConverter should be locked to prevent automatic
496 upgrade or not. Default is False.
497
498 """
499 _mapper = [(nx.bool, str2bool, False),
500 (nx.int_, int, -1),]
501
502 # On 32-bit systems, we need to make sure that we explicitly include
503 # nx.int64 since ns.int_ is nx.int32.
504 if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize:
505 _mapper.append((nx.int64, int, -1))
506
507 _mapper.extend([(nx.float64, float, nx.nan),
508 (nx.complex128, complex, nx.nan + 0j),
509 (nx.longdouble, nx.longdouble, nx.nan),
510 # If a non-default dtype is passed, fall back to generic
511 # ones (should only be used for the converter)
512 (nx.integer, int, -1),
513 (nx.floating, float, nx.nan),
514 (nx.complexfloating, complex, nx.nan + 0j),
515 # Last, try with the string types (must be last, because
516 # `_mapper[-1]` is used as default in some cases)
517 (nx.str_, asunicode, '???'),
518 (nx.bytes_, asbytes, '???'),
519 ])
520
521 @classmethod
522 def _getdtype(cls, val):
523 """Returns the dtype of the input variable."""
524 return np.array(val).dtype
525
526 @classmethod
527 def _getsubdtype(cls, val):
528 """Returns the type of the dtype of the input variable."""
529 return np.array(val).dtype.type
530
531 @classmethod
532 def _dtypeortype(cls, dtype):
533 """Returns dtype for datetime64 and type of dtype otherwise."""
534
535 # This is a bit annoying. We want to return the "general" type in most
536 # cases (ie. "string" rather than "S10"), but we want to return the
537 # specific type for datetime64 (ie. "datetime64[us]" rather than
538 # "datetime64").
539 if dtype.type == np.datetime64:
540 return dtype
541 return dtype.type
542
543 @classmethod
544 def upgrade_mapper(cls, func, default=None):
545 """
546 Upgrade the mapper of a StringConverter by adding a new function and
547 its corresponding default.
548
549 The input function (or sequence of functions) and its associated
550 default value (if any) is inserted in penultimate position of the
551 mapper. The corresponding type is estimated from the dtype of the
552 default value.
553
554 Parameters
555 ----------
556 func : var
557 Function, or sequence of functions
558
559 Examples
560 --------
561 >>> import dateutil.parser
562 >>> import datetime
563 >>> dateparser = dateutil.parser.parse
564 >>> defaultdate = datetime.date(2000, 1, 1)
565 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
566 """
567 # Func is a single functions
568 if callable(func):
569 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
570 return
571 elif hasattr(func, '__iter__'):
572 if isinstance(func[0], (tuple, list)):
573 for _ in func:
574 cls._mapper.insert(-1, _)
575 return
576 if default is None:
577 default = [None] * len(func)
578 else:
579 default = list(default)
580 default.append([None] * (len(func) - len(default)))
581 for fct, dft in zip(func, default):
582 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
583
584 @classmethod
585 def _find_map_entry(cls, dtype):
586 # if a converter for the specific dtype is available use that
587 for i, (deftype, func, default_def) in enumerate(cls._mapper):
588 if dtype.type == deftype:
589 return i, (deftype, func, default_def)
590
591 # otherwise find an inexact match
592 for i, (deftype, func, default_def) in enumerate(cls._mapper):
593 if np.issubdtype(dtype.type, deftype):
594 return i, (deftype, func, default_def)
595
596 raise LookupError
597
598 def __init__(self, dtype_or_func=None, default=None, missing_values=None,
599 locked=False):
600 # Defines a lock for upgrade
601 self._locked = bool(locked)
602 # No input dtype: minimal initialization
603 if dtype_or_func is None:
604 self.func = str2bool
605 self._status = 0
606 self.default = default or False
607 dtype = np.dtype('bool')
608 else:
609 # Is the input a np.dtype ?
610 try:
611 self.func = None
612 dtype = np.dtype(dtype_or_func)
613 except TypeError:
614 # dtype_or_func must be a function, then
615 if not callable(dtype_or_func):
616 errmsg = ("The input argument `dtype` is neither a"
617 " function nor a dtype (got '%s' instead)")
618 raise TypeError(errmsg % type(dtype_or_func))
619 # Set the function
620 self.func = dtype_or_func
621 # If we don't have a default, try to guess it or set it to
622 # None
623 if default is None:
624 try:
625 default = self.func('0')
626 except ValueError:
627 default = None
628 dtype = self._getdtype(default)
629
630 # find the best match in our mapper
631 try:
632 self._status, (_, func, default_def) = self._find_map_entry(dtype)
633 except LookupError:
634 # no match
635 self.default = default
636 _, func, _ = self._mapper[-1]
637 self._status = 0
638 else:
639 # use the found default only if we did not already have one
640 if default is None:
641 self.default = default_def
642 else:
643 self.default = default
644
645 # If the input was a dtype, set the function to the last we saw
646 if self.func is None:
647 self.func = func
648
649 # If the status is 1 (int), change the function to
650 # something more robust.
651 if self.func == self._mapper[1][1]:
652 if issubclass(dtype.type, np.uint64):
653 self.func = np.uint64
654 elif issubclass(dtype.type, np.int64):
655 self.func = np.int64
656 else:
657 self.func = lambda x: int(float(x))
658 # Store the list of strings corresponding to missing values.
659 if missing_values is None:
660 self.missing_values = {''}
661 else:
662 if isinstance(missing_values, str):
663 missing_values = missing_values.split(",")
664 self.missing_values = set(list(missing_values) + [''])
665
666 self._callingfunction = self._strict_call
667 self.type = self._dtypeortype(dtype)
668 self._checked = False
669 self._initial_default = default
670
671 def _loose_call(self, value):
672 try:
673 return self.func(value)
674 except ValueError:
675 return self.default
676
677 def _strict_call(self, value):
678 try:
679
680 # We check if we can convert the value using the current function
681 new_value = self.func(value)
682
683 # In addition to having to check whether func can convert the
684 # value, we also have to make sure that we don't get overflow
685 # errors for integers.
686 if self.func is int:
687 try:
688 np.array(value, dtype=self.type)
689 except OverflowError:
690 raise ValueError
691
692 # We're still here so we can now return the new value
693 return new_value
694
695 except ValueError:
696 if value.strip() in self.missing_values:
697 if not self._status:
698 self._checked = False
699 return self.default
700 raise ValueError("Cannot convert string '%s'" % value)
701
702 def __call__(self, value):
703 return self._callingfunction(value)
704
705 def _do_upgrade(self):
706 # Raise an exception if we locked the converter...
707 if self._locked:
708 errmsg = "Converter is locked and cannot be upgraded"
709 raise ConverterLockError(errmsg)
710 _statusmax = len(self._mapper)
711 # Complains if we try to upgrade by the maximum
712 _status = self._status
713 if _status == _statusmax:
714 errmsg = "Could not find a valid conversion function"
715 raise ConverterError(errmsg)
716 elif _status < _statusmax - 1:
717 _status += 1
718 self.type, self.func, default = self._mapper[_status]
719 self._status = _status
720 if self._initial_default is not None:
721 self.default = self._initial_default
722 else:
723 self.default = default
724
725 def upgrade(self, value):
726 """
727 Find the best converter for a given string, and return the result.
728
729 The supplied string `value` is converted by testing different
730 converters in order. First the `func` method of the
731 `StringConverter` instance is tried, if this fails other available
732 converters are tried. The order in which these other converters
733 are tried is determined by the `_status` attribute of the instance.
734
735 Parameters
736 ----------
737 value : str
738 The string to convert.
739
740 Returns
741 -------
742 out : any
743 The result of converting `value` with the appropriate converter.
744
745 """
746 self._checked = True
747 try:
748 return self._strict_call(value)
749 except ValueError:
750 self._do_upgrade()
751 return self.upgrade(value)
752
753 def iterupgrade(self, value):
754 self._checked = True
755 if not hasattr(value, '__iter__'):
756 value = (value,)
757 _strict_call = self._strict_call
758 try:
759 for _m in value:
760 _strict_call(_m)
761 except ValueError:
762 self._do_upgrade()
763 self.iterupgrade(value)
764
765 def update(self, func, default=None, testing_value=None,
766 missing_values='', locked=False):
767 """
768 Set StringConverter attributes directly.
769
770 Parameters
771 ----------
772 func : function
773 Conversion function.
774 default : any, optional
775 Value to return by default, that is, when the string to be
776 converted is flagged as missing. If not given,
777 `StringConverter` tries to supply a reasonable default value.
778 testing_value : str, optional
779 A string representing a standard input value of the converter.
780 This string is used to help defining a reasonable default
781 value.
782 missing_values : {sequence of str, None}, optional
783 Sequence of strings indicating a missing value. If ``None``, then
784 the existing `missing_values` are cleared. The default is ``''``.
785 locked : bool, optional
786 Whether the StringConverter should be locked to prevent
787 automatic upgrade or not. Default is False.
788
789 Notes
790 -----
791 `update` takes the same parameters as the constructor of
792 `StringConverter`, except that `func` does not accept a `dtype`
793 whereas `dtype_or_func` in the constructor does.
794
795 """
796 self.func = func
797 self._locked = locked
798
799 # Don't reset the default to None if we can avoid it
800 if default is not None:
801 self.default = default
802 self.type = self._dtypeortype(self._getdtype(default))
803 else:
804 try:
805 tester = func(testing_value or '1')
806 except (TypeError, ValueError):
807 tester = None
808 self.type = self._dtypeortype(self._getdtype(tester))
809
810 # Add the missing values to the existing set or clear it.
811 if missing_values is None:
812 # Clear all missing values even though the ctor initializes it to
813 # set(['']) when the argument is None.
814 self.missing_values = set()
815 else:
816 if not np.iterable(missing_values):
817 missing_values = [missing_values]
818 if not all(isinstance(v, str) for v in missing_values):
819 raise TypeError("missing_values must be strings or unicode")
820 self.missing_values.update(missing_values)
821
822
823def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
824 """
825 Convenience function to create a `np.dtype` object.
826
827 The function processes the input `dtype` and matches it with the given
828 names.
829
830 Parameters
831 ----------
832 ndtype : var
833 Definition of the dtype. Can be any string or dictionary recognized
834 by the `np.dtype` function, or a sequence of types.
835 names : str or sequence, optional
836 Sequence of strings to use as field names for a structured dtype.
837 For convenience, `names` can be a string of a comma-separated list
838 of names.
839 defaultfmt : str, optional
840 Format string used to define missing names, such as ``"f%i"``
841 (default) or ``"fields_%02i"``.
842 validationargs : optional
843 A series of optional arguments used to initialize a
844 `NameValidator`.
845
846 Examples
847 --------
848 >>> import numpy as np
849 >>> np.lib._iotools.easy_dtype(float)
850 dtype('float64')
851 >>> np.lib._iotools.easy_dtype("i4, f8")
852 dtype([('f0', '<i4'), ('f1', '<f8')])
853 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
854 dtype([('field_000', '<i4'), ('field_001', '<f8')])
855
856 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
857 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
858 >>> np.lib._iotools.easy_dtype(float, names="a,b,c")
859 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
860
861 """
862 try:
863 ndtype = np.dtype(ndtype)
864 except TypeError:
865 validate = NameValidator(**validationargs)
866 nbfields = len(ndtype)
867 if names is None:
868 names = [''] * len(ndtype)
869 elif isinstance(names, str):
870 names = names.split(",")
871 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
872 ndtype = np.dtype(dict(formats=ndtype, names=names))
873 else:
874 # Explicit names
875 if names is not None:
876 validate = NameValidator(**validationargs)
877 if isinstance(names, str):
878 names = names.split(",")
879 # Simple dtype: repeat to match the nb of names
880 if ndtype.names is None:
881 formats = tuple([ndtype.type] * len(names))
882 names = validate(names, defaultfmt=defaultfmt)
883 ndtype = np.dtype(list(zip(names, formats)))
884 # Structured dtype: just validate the names as needed
885 else:
886 ndtype.names = validate(names, nbfields=len(ndtype.names),
887 defaultfmt=defaultfmt)
888 # No implicit names
889 elif ndtype.names is not None:
890 validate = NameValidator(**validationargs)
891 # Default initial names : should we change the format ?
892 numbered_names = tuple("f%i" % i for i in range(len(ndtype.names)))
893 if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")):
894 ndtype.names = validate([''] * len(ndtype.names),
895 defaultfmt=defaultfmt)
896 # Explicit initial names : just validate
897 else:
898 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
899 return ndtype