1"""A collection of functions designed to help I/O with ascii files.
2
3"""
4__docformat__ = "restructuredtext en"
5
6import numpy as np
7import numpy.core.numeric as nx
8from numpy.compat import asbytes, asunicode
9
10
11def _decode_line(line, encoding=None):
12 """Decode bytes from binary input streams.
13
14 Defaults to decoding from 'latin1'. That differs from the behavior of
15 np.compat.asunicode that decodes from 'ascii'.
16
17 Parameters
18 ----------
19 line : str or bytes
20 Line to be decoded.
21 encoding : str
22 Encoding used to decode `line`.
23
24 Returns
25 -------
26 decoded_line : str
27
28 """
29 if type(line) is bytes:
30 if encoding is None:
31 encoding = "latin1"
32 line = line.decode(encoding)
33
34 return line
35
36
37def _is_string_like(obj):
38 """
39 Check whether obj behaves like a string.
40 """
41 try:
42 obj + ''
43 except (TypeError, ValueError):
44 return False
45 return True
46
47
48def _is_bytes_like(obj):
49 """
50 Check whether obj behaves like a bytes object.
51 """
52 try:
53 obj + b''
54 except (TypeError, ValueError):
55 return False
56 return True
57
58
59def has_nested_fields(ndtype):
60 """
61 Returns whether one or several fields of a dtype are nested.
62
63 Parameters
64 ----------
65 ndtype : dtype
66 Data-type of a structured array.
67
68 Raises
69 ------
70 AttributeError
71 If `ndtype` does not have a `names` attribute.
72
73 Examples
74 --------
75 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
76 >>> np.lib._iotools.has_nested_fields(dt)
77 False
78
79 """
80 for name in ndtype.names or ():
81 if ndtype[name].names is not None:
82 return True
83 return False
84
85
86def flatten_dtype(ndtype, flatten_base=False):
87 """
88 Unpack a structured data-type by collapsing nested fields and/or fields
89 with a shape.
90
91 Note that the field names are lost.
92
93 Parameters
94 ----------
95 ndtype : dtype
96 The datatype to collapse
97 flatten_base : bool, optional
98 If True, transform a field with a shape into several fields. Default is
99 False.
100
101 Examples
102 --------
103 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
104 ... ('block', int, (2, 3))])
105 >>> np.lib._iotools.flatten_dtype(dt)
106 [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')]
107 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
108 [dtype('S4'),
109 dtype('float64'),
110 dtype('float64'),
111 dtype('int64'),
112 dtype('int64'),
113 dtype('int64'),
114 dtype('int64'),
115 dtype('int64'),
116 dtype('int64')]
117
118 """
119 names = ndtype.names
120 if names is None:
121 if flatten_base:
122 return [ndtype.base] * int(np.prod(ndtype.shape))
123 return [ndtype.base]
124 else:
125 types = []
126 for field in names:
127 info = ndtype.fields[field]
128 flat_dt = flatten_dtype(info[0], flatten_base)
129 types.extend(flat_dt)
130 return types
131
132
133class LineSplitter:
134 """
135 Object to split a string at a given delimiter or at given places.
136
137 Parameters
138 ----------
139 delimiter : str, int, or sequence of ints, optional
140 If a string, character used to delimit consecutive fields.
141 If an integer or a sequence of integers, width(s) of each field.
142 comments : str, optional
143 Character used to mark the beginning of a comment. Default is '#'.
144 autostrip : bool, optional
145 Whether to strip each individual field. Default is True.
146
147 """
148
149 def autostrip(self, method):
150 """
151 Wrapper to strip each member of the output of `method`.
152
153 Parameters
154 ----------
155 method : function
156 Function that takes a single argument and returns a sequence of
157 strings.
158
159 Returns
160 -------
161 wrapped : function
162 The result of wrapping `method`. `wrapped` takes a single input
163 argument and returns a list of strings that are stripped of
164 white-space.
165
166 """
167 return lambda input: [_.strip() for _ in method(input)]
168
169 def __init__(self, delimiter=None, comments='#', autostrip=True,
170 encoding=None):
171 delimiter = _decode_line(delimiter)
172 comments = _decode_line(comments)
173
174 self.comments = comments
175
176 # Delimiter is a character
177 if (delimiter is None) or isinstance(delimiter, str):
178 delimiter = delimiter or None
179 _handyman = self._delimited_splitter
180 # Delimiter is a list of field widths
181 elif hasattr(delimiter, '__iter__'):
182 _handyman = self._variablewidth_splitter
183 idx = np.cumsum([0] + list(delimiter))
184 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
185 # Delimiter is a single integer
186 elif int(delimiter):
187 (_handyman, delimiter) = (
188 self._fixedwidth_splitter, int(delimiter))
189 else:
190 (_handyman, delimiter) = (self._delimited_splitter, None)
191 self.delimiter = delimiter
192 if autostrip:
193 self._handyman = self.autostrip(_handyman)
194 else:
195 self._handyman = _handyman
196 self.encoding = encoding
197
198 def _delimited_splitter(self, line):
199 """Chop off comments, strip, and split at delimiter. """
200 if self.comments is not None:
201 line = line.split(self.comments)[0]
202 line = line.strip(" \r\n")
203 if not line:
204 return []
205 return line.split(self.delimiter)
206
207 def _fixedwidth_splitter(self, line):
208 if self.comments is not None:
209 line = line.split(self.comments)[0]
210 line = line.strip("\r\n")
211 if not line:
212 return []
213 fixed = self.delimiter
214 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
215 return [line[s] for s in slices]
216
217 def _variablewidth_splitter(self, line):
218 if self.comments is not None:
219 line = line.split(self.comments)[0]
220 if not line:
221 return []
222 slices = self.delimiter
223 return [line[s] for s in slices]
224
225 def __call__(self, line):
226 return self._handyman(_decode_line(line, self.encoding))
227
228
229class NameValidator:
230 """
231 Object to validate a list of strings to use as field names.
232
233 The strings are stripped of any non alphanumeric character, and spaces
234 are replaced by '_'. During instantiation, the user can define a list
235 of names to exclude, as well as a list of invalid characters. Names in
236 the exclusion list are appended a '_' character.
237
238 Once an instance has been created, it can be called with a list of
239 names, and a list of valid names will be created. The `__call__`
240 method accepts an optional keyword "default" that sets the default name
241 in case of ambiguity. By default this is 'f', so that names will
242 default to `f0`, `f1`, etc.
243
244 Parameters
245 ----------
246 excludelist : sequence, optional
247 A list of names to exclude. This list is appended to the default
248 list ['return', 'file', 'print']. Excluded names are appended an
249 underscore: for example, `file` becomes `file_` if supplied.
250 deletechars : str, optional
251 A string combining invalid characters that must be deleted from the
252 names.
253 case_sensitive : {True, False, 'upper', 'lower'}, optional
254 * If True, field names are case-sensitive.
255 * If False or 'upper', field names are converted to upper case.
256 * If 'lower', field names are converted to lower case.
257
258 The default value is True.
259 replace_space : '_', optional
260 Character(s) used in replacement of white spaces.
261
262 Notes
263 -----
264 Calling an instance of `NameValidator` is the same as calling its
265 method `validate`.
266
267 Examples
268 --------
269 >>> validator = np.lib._iotools.NameValidator()
270 >>> validator(['file', 'field2', 'with space', 'CaSe'])
271 ('file_', 'field2', 'with_space', 'CaSe')
272
273 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
274 ... deletechars='q',
275 ... case_sensitive=False)
276 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
277 ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE')
278
279 """
280
281 defaultexcludelist = ['return', 'file', 'print']
282 defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
283
284 def __init__(self, excludelist=None, deletechars=None,
285 case_sensitive=None, replace_space='_'):
286 # Process the exclusion list ..
287 if excludelist is None:
288 excludelist = []
289 excludelist.extend(self.defaultexcludelist)
290 self.excludelist = excludelist
291 # Process the list of characters to delete
292 if deletechars is None:
293 delete = self.defaultdeletechars
294 else:
295 delete = set(deletechars)
296 delete.add('"')
297 self.deletechars = delete
298 # Process the case option .....
299 if (case_sensitive is None) or (case_sensitive is True):
300 self.case_converter = lambda x: x
301 elif (case_sensitive is False) or case_sensitive.startswith('u'):
302 self.case_converter = lambda x: x.upper()
303 elif case_sensitive.startswith('l'):
304 self.case_converter = lambda x: x.lower()
305 else:
306 msg = 'unrecognized case_sensitive value %s.' % case_sensitive
307 raise ValueError(msg)
308
309 self.replace_space = replace_space
310
311 def validate(self, names, defaultfmt="f%i", nbfields=None):
312 """
313 Validate a list of strings as field names for a structured array.
314
315 Parameters
316 ----------
317 names : sequence of str
318 Strings to be validated.
319 defaultfmt : str, optional
320 Default format string, used if validating a given string
321 reduces its length to zero.
322 nbfields : integer, optional
323 Final number of validated names, used to expand or shrink the
324 initial list of names.
325
326 Returns
327 -------
328 validatednames : list of str
329 The list of validated field names.
330
331 Notes
332 -----
333 A `NameValidator` instance can be called directly, which is the
334 same as calling `validate`. For examples, see `NameValidator`.
335
336 """
337 # Initial checks ..............
338 if (names is None):
339 if (nbfields is None):
340 return None
341 names = []
342 if isinstance(names, str):
343 names = [names, ]
344 if nbfields is not None:
345 nbnames = len(names)
346 if (nbnames < nbfields):
347 names = list(names) + [''] * (nbfields - nbnames)
348 elif (nbnames > nbfields):
349 names = names[:nbfields]
350 # Set some shortcuts ...........
351 deletechars = self.deletechars
352 excludelist = self.excludelist
353 case_converter = self.case_converter
354 replace_space = self.replace_space
355 # Initializes some variables ...
356 validatednames = []
357 seen = dict()
358 nbempty = 0
359
360 for item in names:
361 item = case_converter(item).strip()
362 if replace_space:
363 item = item.replace(' ', replace_space)
364 item = ''.join([c for c in item if c not in deletechars])
365 if item == '':
366 item = defaultfmt % nbempty
367 while item in names:
368 nbempty += 1
369 item = defaultfmt % nbempty
370 nbempty += 1
371 elif item in excludelist:
372 item += '_'
373 cnt = seen.get(item, 0)
374 if cnt > 0:
375 validatednames.append(item + '_%d' % cnt)
376 else:
377 validatednames.append(item)
378 seen[item] = cnt + 1
379 return tuple(validatednames)
380
381 def __call__(self, names, defaultfmt="f%i", nbfields=None):
382 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
383
384
385def str2bool(value):
386 """
387 Tries to transform a string supposed to represent a boolean to a boolean.
388
389 Parameters
390 ----------
391 value : str
392 The string that is transformed to a boolean.
393
394 Returns
395 -------
396 boolval : bool
397 The boolean representation of `value`.
398
399 Raises
400 ------
401 ValueError
402 If the string is not 'True' or 'False' (case independent)
403
404 Examples
405 --------
406 >>> np.lib._iotools.str2bool('TRUE')
407 True
408 >>> np.lib._iotools.str2bool('false')
409 False
410
411 """
412 value = value.upper()
413 if value == 'TRUE':
414 return True
415 elif value == 'FALSE':
416 return False
417 else:
418 raise ValueError("Invalid boolean")
419
420
421class ConverterError(Exception):
422 """
423 Exception raised when an error occurs in a converter for string values.
424
425 """
426 pass
427
428
429class ConverterLockError(ConverterError):
430 """
431 Exception raised when an attempt is made to upgrade a locked converter.
432
433 """
434 pass
435
436
437class ConversionWarning(UserWarning):
438 """
439 Warning issued when a string converter has a problem.
440
441 Notes
442 -----
443 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
444 is explicitly suppressed with the "invalid_raise" keyword.
445
446 """
447 pass
448
449
450class StringConverter:
451 """
452 Factory class for function transforming a string into another object
453 (int, float).
454
455 After initialization, an instance can be called to transform a string
456 into another object. If the string is recognized as representing a
457 missing value, a default value is returned.
458
459 Attributes
460 ----------
461 func : function
462 Function used for the conversion.
463 default : any
464 Default value to return when the input corresponds to a missing
465 value.
466 type : type
467 Type of the output.
468 _status : int
469 Integer representing the order of the conversion.
470 _mapper : sequence of tuples
471 Sequence of tuples (dtype, function, default value) to evaluate in
472 order.
473 _locked : bool
474 Holds `locked` parameter.
475
476 Parameters
477 ----------
478 dtype_or_func : {None, dtype, function}, optional
479 If a `dtype`, specifies the input data type, used to define a basic
480 function and a default value for missing data. For example, when
481 `dtype` is float, the `func` attribute is set to `float` and the
482 default value to `np.nan`. If a function, this function is used to
483 convert a string to another object. In this case, it is recommended
484 to give an associated default value as input.
485 default : any, optional
486 Value to return by default, that is, when the string to be
487 converted is flagged as missing. If not given, `StringConverter`
488 tries to supply a reasonable default value.
489 missing_values : {None, sequence of str}, optional
490 ``None`` or sequence of strings indicating a missing value. If ``None``
491 then missing values are indicated by empty entries. The default is
492 ``None``.
493 locked : bool, optional
494 Whether the StringConverter should be locked to prevent automatic
495 upgrade or not. Default is False.
496
497 """
498 _mapper = [(nx.bool_, str2bool, False),
499 (nx.int_, int, -1),]
500
501 # On 32-bit systems, we need to make sure that we explicitly include
502 # nx.int64 since ns.int_ is nx.int32.
503 if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize:
504 _mapper.append((nx.int64, int, -1))
505
506 _mapper.extend([(nx.float64, float, nx.nan),
507 (nx.complex128, complex, nx.nan + 0j),
508 (nx.longdouble, nx.longdouble, nx.nan),
509 # If a non-default dtype is passed, fall back to generic
510 # ones (should only be used for the converter)
511 (nx.integer, int, -1),
512 (nx.floating, float, nx.nan),
513 (nx.complexfloating, complex, nx.nan + 0j),
514 # Last, try with the string types (must be last, because
515 # `_mapper[-1]` is used as default in some cases)
516 (nx.unicode_, asunicode, '???'),
517 (nx.string_, asbytes, '???'),
518 ])
519
520 @classmethod
521 def _getdtype(cls, val):
522 """Returns the dtype of the input variable."""
523 return np.array(val).dtype
524
525 @classmethod
526 def _getsubdtype(cls, val):
527 """Returns the type of the dtype of the input variable."""
528 return np.array(val).dtype.type
529
530 @classmethod
531 def _dtypeortype(cls, dtype):
532 """Returns dtype for datetime64 and type of dtype otherwise."""
533
534 # This is a bit annoying. We want to return the "general" type in most
535 # cases (ie. "string" rather than "S10"), but we want to return the
536 # specific type for datetime64 (ie. "datetime64[us]" rather than
537 # "datetime64").
538 if dtype.type == np.datetime64:
539 return dtype
540 return dtype.type
541
542 @classmethod
543 def upgrade_mapper(cls, func, default=None):
544 """
545 Upgrade the mapper of a StringConverter by adding a new function and
546 its corresponding default.
547
548 The input function (or sequence of functions) and its associated
549 default value (if any) is inserted in penultimate position of the
550 mapper. The corresponding type is estimated from the dtype of the
551 default value.
552
553 Parameters
554 ----------
555 func : var
556 Function, or sequence of functions
557
558 Examples
559 --------
560 >>> import dateutil.parser
561 >>> import datetime
562 >>> dateparser = dateutil.parser.parse
563 >>> defaultdate = datetime.date(2000, 1, 1)
564 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
565 """
566 # Func is a single functions
567 if hasattr(func, '__call__'):
568 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
569 return
570 elif hasattr(func, '__iter__'):
571 if isinstance(func[0], (tuple, list)):
572 for _ in func:
573 cls._mapper.insert(-1, _)
574 return
575 if default is None:
576 default = [None] * len(func)
577 else:
578 default = list(default)
579 default.append([None] * (len(func) - len(default)))
580 for fct, dft in zip(func, default):
581 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
582
583 @classmethod
584 def _find_map_entry(cls, dtype):
585 # if a converter for the specific dtype is available use that
586 for i, (deftype, func, default_def) in enumerate(cls._mapper):
587 if dtype.type == deftype:
588 return i, (deftype, func, default_def)
589
590 # otherwise find an inexact match
591 for i, (deftype, func, default_def) in enumerate(cls._mapper):
592 if np.issubdtype(dtype.type, deftype):
593 return i, (deftype, func, default_def)
594
595 raise LookupError
596
597 def __init__(self, dtype_or_func=None, default=None, missing_values=None,
598 locked=False):
599 # Defines a lock for upgrade
600 self._locked = bool(locked)
601 # No input dtype: minimal initialization
602 if dtype_or_func is None:
603 self.func = str2bool
604 self._status = 0
605 self.default = default or False
606 dtype = np.dtype('bool')
607 else:
608 # Is the input a np.dtype ?
609 try:
610 self.func = None
611 dtype = np.dtype(dtype_or_func)
612 except TypeError:
613 # dtype_or_func must be a function, then
614 if not hasattr(dtype_or_func, '__call__'):
615 errmsg = ("The input argument `dtype` is neither a"
616 " function nor a dtype (got '%s' instead)")
617 raise TypeError(errmsg % type(dtype_or_func))
618 # Set the function
619 self.func = dtype_or_func
620 # If we don't have a default, try to guess it or set it to
621 # None
622 if default is None:
623 try:
624 default = self.func('0')
625 except ValueError:
626 default = None
627 dtype = self._getdtype(default)
628
629 # find the best match in our mapper
630 try:
631 self._status, (_, func, default_def) = self._find_map_entry(dtype)
632 except LookupError:
633 # no match
634 self.default = default
635 _, func, _ = self._mapper[-1]
636 self._status = 0
637 else:
638 # use the found default only if we did not already have one
639 if default is None:
640 self.default = default_def
641 else:
642 self.default = default
643
644 # If the input was a dtype, set the function to the last we saw
645 if self.func is None:
646 self.func = func
647
648 # If the status is 1 (int), change the function to
649 # something more robust.
650 if self.func == self._mapper[1][1]:
651 if issubclass(dtype.type, np.uint64):
652 self.func = np.uint64
653 elif issubclass(dtype.type, np.int64):
654 self.func = np.int64
655 else:
656 self.func = lambda x: int(float(x))
657 # Store the list of strings corresponding to missing values.
658 if missing_values is None:
659 self.missing_values = {''}
660 else:
661 if isinstance(missing_values, str):
662 missing_values = missing_values.split(",")
663 self.missing_values = set(list(missing_values) + [''])
664
665 self._callingfunction = self._strict_call
666 self.type = self._dtypeortype(dtype)
667 self._checked = False
668 self._initial_default = default
669
670 def _loose_call(self, value):
671 try:
672 return self.func(value)
673 except ValueError:
674 return self.default
675
676 def _strict_call(self, value):
677 try:
678
679 # We check if we can convert the value using the current function
680 new_value = self.func(value)
681
682 # In addition to having to check whether func can convert the
683 # value, we also have to make sure that we don't get overflow
684 # errors for integers.
685 if self.func is int:
686 try:
687 np.array(value, dtype=self.type)
688 except OverflowError:
689 raise ValueError
690
691 # We're still here so we can now return the new value
692 return new_value
693
694 except ValueError:
695 if value.strip() in self.missing_values:
696 if not self._status:
697 self._checked = False
698 return self.default
699 raise ValueError("Cannot convert string '%s'" % value)
700
701 def __call__(self, value):
702 return self._callingfunction(value)
703
704 def _do_upgrade(self):
705 # Raise an exception if we locked the converter...
706 if self._locked:
707 errmsg = "Converter is locked and cannot be upgraded"
708 raise ConverterLockError(errmsg)
709 _statusmax = len(self._mapper)
710 # Complains if we try to upgrade by the maximum
711 _status = self._status
712 if _status == _statusmax:
713 errmsg = "Could not find a valid conversion function"
714 raise ConverterError(errmsg)
715 elif _status < _statusmax - 1:
716 _status += 1
717 self.type, self.func, default = self._mapper[_status]
718 self._status = _status
719 if self._initial_default is not None:
720 self.default = self._initial_default
721 else:
722 self.default = default
723
724 def upgrade(self, value):
725 """
726 Find the best converter for a given string, and return the result.
727
728 The supplied string `value` is converted by testing different
729 converters in order. First the `func` method of the
730 `StringConverter` instance is tried, if this fails other available
731 converters are tried. The order in which these other converters
732 are tried is determined by the `_status` attribute of the instance.
733
734 Parameters
735 ----------
736 value : str
737 The string to convert.
738
739 Returns
740 -------
741 out : any
742 The result of converting `value` with the appropriate converter.
743
744 """
745 self._checked = True
746 try:
747 return self._strict_call(value)
748 except ValueError:
749 self._do_upgrade()
750 return self.upgrade(value)
751
752 def iterupgrade(self, value):
753 self._checked = True
754 if not hasattr(value, '__iter__'):
755 value = (value,)
756 _strict_call = self._strict_call
757 try:
758 for _m in value:
759 _strict_call(_m)
760 except ValueError:
761 self._do_upgrade()
762 self.iterupgrade(value)
763
764 def update(self, func, default=None, testing_value=None,
765 missing_values='', locked=False):
766 """
767 Set StringConverter attributes directly.
768
769 Parameters
770 ----------
771 func : function
772 Conversion function.
773 default : any, optional
774 Value to return by default, that is, when the string to be
775 converted is flagged as missing. If not given,
776 `StringConverter` tries to supply a reasonable default value.
777 testing_value : str, optional
778 A string representing a standard input value of the converter.
779 This string is used to help defining a reasonable default
780 value.
781 missing_values : {sequence of str, None}, optional
782 Sequence of strings indicating a missing value. If ``None``, then
783 the existing `missing_values` are cleared. The default is `''`.
784 locked : bool, optional
785 Whether the StringConverter should be locked to prevent
786 automatic upgrade or not. Default is False.
787
788 Notes
789 -----
790 `update` takes the same parameters as the constructor of
791 `StringConverter`, except that `func` does not accept a `dtype`
792 whereas `dtype_or_func` in the constructor does.
793
794 """
795 self.func = func
796 self._locked = locked
797
798 # Don't reset the default to None if we can avoid it
799 if default is not None:
800 self.default = default
801 self.type = self._dtypeortype(self._getdtype(default))
802 else:
803 try:
804 tester = func(testing_value or '1')
805 except (TypeError, ValueError):
806 tester = None
807 self.type = self._dtypeortype(self._getdtype(tester))
808
809 # Add the missing values to the existing set or clear it.
810 if missing_values is None:
811 # Clear all missing values even though the ctor initializes it to
812 # set(['']) when the argument is None.
813 self.missing_values = set()
814 else:
815 if not np.iterable(missing_values):
816 missing_values = [missing_values]
817 if not all(isinstance(v, str) for v in missing_values):
818 raise TypeError("missing_values must be strings or unicode")
819 self.missing_values.update(missing_values)
820
821
822def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
823 """
824 Convenience function to create a `np.dtype` object.
825
826 The function processes the input `dtype` and matches it with the given
827 names.
828
829 Parameters
830 ----------
831 ndtype : var
832 Definition of the dtype. Can be any string or dictionary recognized
833 by the `np.dtype` function, or a sequence of types.
834 names : str or sequence, optional
835 Sequence of strings to use as field names for a structured dtype.
836 For convenience, `names` can be a string of a comma-separated list
837 of names.
838 defaultfmt : str, optional
839 Format string used to define missing names, such as ``"f%i"``
840 (default) or ``"fields_%02i"``.
841 validationargs : optional
842 A series of optional arguments used to initialize a
843 `NameValidator`.
844
845 Examples
846 --------
847 >>> np.lib._iotools.easy_dtype(float)
848 dtype('float64')
849 >>> np.lib._iotools.easy_dtype("i4, f8")
850 dtype([('f0', '<i4'), ('f1', '<f8')])
851 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
852 dtype([('field_000', '<i4'), ('field_001', '<f8')])
853
854 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
855 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
856 >>> np.lib._iotools.easy_dtype(float, names="a,b,c")
857 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
858
859 """
860 try:
861 ndtype = np.dtype(ndtype)
862 except TypeError:
863 validate = NameValidator(**validationargs)
864 nbfields = len(ndtype)
865 if names is None:
866 names = [''] * len(ndtype)
867 elif isinstance(names, str):
868 names = names.split(",")
869 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
870 ndtype = np.dtype(dict(formats=ndtype, names=names))
871 else:
872 # Explicit names
873 if names is not None:
874 validate = NameValidator(**validationargs)
875 if isinstance(names, str):
876 names = names.split(",")
877 # Simple dtype: repeat to match the nb of names
878 if ndtype.names is None:
879 formats = tuple([ndtype.type] * len(names))
880 names = validate(names, defaultfmt=defaultfmt)
881 ndtype = np.dtype(list(zip(names, formats)))
882 # Structured dtype: just validate the names as needed
883 else:
884 ndtype.names = validate(names, nbfields=len(ndtype.names),
885 defaultfmt=defaultfmt)
886 # No implicit names
887 elif ndtype.names is not None:
888 validate = NameValidator(**validationargs)
889 # Default initial names : should we change the format ?
890 numbered_names = tuple("f%i" % i for i in range(len(ndtype.names)))
891 if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")):
892 ndtype.names = validate([''] * len(ndtype.names),
893 defaultfmt=defaultfmt)
894 # Explicit initial names : just validate
895 else:
896 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
897 return ndtype