1"""
2This module contains a set of functions for vectorized string
3operations and methods.
4
5.. note::
6 The `chararray` class exists for backwards compatibility with
7 Numarray, it is not recommended for new development. Starting from numpy
8 1.4, if one needs arrays of strings, it is recommended to use arrays of
9 `dtype` `object_`, `bytes_` or `str_`, and use the free functions
10 in the `numpy.char` module for fast vectorized string operations.
11
12Some methods will only be available if the corresponding string method is
13available in your version of Python.
14
15The preferred alias for `defchararray` is `numpy.char`.
16
17"""
18import functools
19
20from .._utils import set_module
21from .numerictypes import (
22 bytes_, str_, integer, int_, object_, bool_, character)
23from .numeric import ndarray, compare_chararrays
24from .numeric import array as narray
25from numpy.core.multiarray import _vec_string
26from numpy.core import overrides
27from numpy.compat import asbytes
28import numpy
29
30__all__ = [
31 'equal', 'not_equal', 'greater_equal', 'less_equal',
32 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
33 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
34 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
35 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
36 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
37 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
38 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
39 'array', 'asarray'
40 ]
41
42
43_globalvar = 0
44
45array_function_dispatch = functools.partial(
46 overrides.array_function_dispatch, module='numpy.char')
47
48
49def _is_unicode(arr):
50 """Returns True if arr is a string or a string array with a dtype that
51 represents a unicode string, otherwise returns False.
52
53 """
54 if (isinstance(arr, str) or
55 issubclass(numpy.asarray(arr).dtype.type, str)):
56 return True
57 return False
58
59
60def _to_bytes_or_str_array(result, output_dtype_like=None):
61 """
62 Helper function to cast a result back into an array
63 with the appropriate dtype if an object array must be used
64 as an intermediary.
65 """
66 ret = numpy.asarray(result.tolist())
67 dtype = getattr(output_dtype_like, 'dtype', None)
68 if dtype is not None:
69 return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False)
70 return ret
71
72
73def _clean_args(*args):
74 """
75 Helper function for delegating arguments to Python string
76 functions.
77
78 Many of the Python string operations that have optional arguments
79 do not use 'None' to indicate a default value. In these cases,
80 we need to remove all None arguments, and those following them.
81 """
82 newargs = []
83 for chk in args:
84 if chk is None:
85 break
86 newargs.append(chk)
87 return newargs
88
89def _get_num_chars(a):
90 """
91 Helper function that returns the number of characters per field in
92 a string or unicode array. This is to abstract out the fact that
93 for a unicode array this is itemsize / 4.
94 """
95 if issubclass(a.dtype.type, str_):
96 return a.itemsize // 4
97 return a.itemsize
98
99
100def _binary_op_dispatcher(x1, x2):
101 return (x1, x2)
102
103
104@array_function_dispatch(_binary_op_dispatcher)
105def equal(x1, x2):
106 """
107 Return (x1 == x2) element-wise.
108
109 Unlike `numpy.equal`, this comparison is performed by first
110 stripping whitespace characters from the end of the string. This
111 behavior is provided for backward-compatibility with numarray.
112
113 Parameters
114 ----------
115 x1, x2 : array_like of str or unicode
116 Input arrays of the same shape.
117
118 Returns
119 -------
120 out : ndarray
121 Output array of bools.
122
123 See Also
124 --------
125 not_equal, greater_equal, less_equal, greater, less
126 """
127 return compare_chararrays(x1, x2, '==', True)
128
129
130@array_function_dispatch(_binary_op_dispatcher)
131def not_equal(x1, x2):
132 """
133 Return (x1 != x2) element-wise.
134
135 Unlike `numpy.not_equal`, this comparison is performed by first
136 stripping whitespace characters from the end of the string. This
137 behavior is provided for backward-compatibility with numarray.
138
139 Parameters
140 ----------
141 x1, x2 : array_like of str or unicode
142 Input arrays of the same shape.
143
144 Returns
145 -------
146 out : ndarray
147 Output array of bools.
148
149 See Also
150 --------
151 equal, greater_equal, less_equal, greater, less
152 """
153 return compare_chararrays(x1, x2, '!=', True)
154
155
156@array_function_dispatch(_binary_op_dispatcher)
157def greater_equal(x1, x2):
158 """
159 Return (x1 >= x2) element-wise.
160
161 Unlike `numpy.greater_equal`, this comparison is performed by
162 first stripping whitespace characters from the end of the string.
163 This behavior is provided for backward-compatibility with
164 numarray.
165
166 Parameters
167 ----------
168 x1, x2 : array_like of str or unicode
169 Input arrays of the same shape.
170
171 Returns
172 -------
173 out : ndarray
174 Output array of bools.
175
176 See Also
177 --------
178 equal, not_equal, less_equal, greater, less
179 """
180 return compare_chararrays(x1, x2, '>=', True)
181
182
183@array_function_dispatch(_binary_op_dispatcher)
184def less_equal(x1, x2):
185 """
186 Return (x1 <= x2) element-wise.
187
188 Unlike `numpy.less_equal`, this comparison is performed by first
189 stripping whitespace characters from the end of the string. This
190 behavior is provided for backward-compatibility with numarray.
191
192 Parameters
193 ----------
194 x1, x2 : array_like of str or unicode
195 Input arrays of the same shape.
196
197 Returns
198 -------
199 out : ndarray
200 Output array of bools.
201
202 See Also
203 --------
204 equal, not_equal, greater_equal, greater, less
205 """
206 return compare_chararrays(x1, x2, '<=', True)
207
208
209@array_function_dispatch(_binary_op_dispatcher)
210def greater(x1, x2):
211 """
212 Return (x1 > x2) element-wise.
213
214 Unlike `numpy.greater`, this comparison is performed by first
215 stripping whitespace characters from the end of the string. This
216 behavior is provided for backward-compatibility with numarray.
217
218 Parameters
219 ----------
220 x1, x2 : array_like of str or unicode
221 Input arrays of the same shape.
222
223 Returns
224 -------
225 out : ndarray
226 Output array of bools.
227
228 See Also
229 --------
230 equal, not_equal, greater_equal, less_equal, less
231 """
232 return compare_chararrays(x1, x2, '>', True)
233
234
235@array_function_dispatch(_binary_op_dispatcher)
236def less(x1, x2):
237 """
238 Return (x1 < x2) element-wise.
239
240 Unlike `numpy.greater`, this comparison is performed by first
241 stripping whitespace characters from the end of the string. This
242 behavior is provided for backward-compatibility with numarray.
243
244 Parameters
245 ----------
246 x1, x2 : array_like of str or unicode
247 Input arrays of the same shape.
248
249 Returns
250 -------
251 out : ndarray
252 Output array of bools.
253
254 See Also
255 --------
256 equal, not_equal, greater_equal, less_equal, greater
257 """
258 return compare_chararrays(x1, x2, '<', True)
259
260
261def _unary_op_dispatcher(a):
262 return (a,)
263
264
265@array_function_dispatch(_unary_op_dispatcher)
266def str_len(a):
267 """
268 Return len(a) element-wise.
269
270 Parameters
271 ----------
272 a : array_like of str or unicode
273
274 Returns
275 -------
276 out : ndarray
277 Output array of integers
278
279 See Also
280 --------
281 len
282
283 Examples
284 --------
285 >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
286 >>> np.char.str_len(a)
287 array([23, 15])
288 >>> a = np.array([u'\u0420', u'\u043e'])
289 >>> np.char.str_len(a)
290 array([1, 1])
291 >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
292 >>> np.char.str_len(a)
293 array([[5, 5], [1, 1]])
294 """
295 # Note: __len__, etc. currently return ints, which are not C-integers.
296 # Generally intp would be expected for lengths, although int is sufficient
297 # due to the dtype itemsize limitation.
298 return _vec_string(a, int_, '__len__')
299
300
301@array_function_dispatch(_binary_op_dispatcher)
302def add(x1, x2):
303 """
304 Return element-wise string concatenation for two arrays of str or unicode.
305
306 Arrays `x1` and `x2` must have the same shape.
307
308 Parameters
309 ----------
310 x1 : array_like of str or unicode
311 Input array.
312 x2 : array_like of str or unicode
313 Input array.
314
315 Returns
316 -------
317 add : ndarray
318 Output array of `bytes_` or `str_`, depending on input types
319 of the same shape as `x1` and `x2`.
320
321 """
322 arr1 = numpy.asarray(x1)
323 arr2 = numpy.asarray(x2)
324 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
325
326 if type(arr1.dtype) != type(arr2.dtype):
327 # Enforce this for now. The solution to it will be implement add
328 # as a ufunc. It never worked right on Python 3: bytes + unicode gave
329 # nonsense unicode + bytes errored, and unicode + object used the
330 # object dtype itemsize as num chars (worked on short strings).
331 # bytes + void worked but promoting void->bytes is dubious also.
332 raise TypeError(
333 "np.char.add() requires both arrays of the same dtype kind, but "
334 f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases "
335 "where this used to work often lead to incorrect results).")
336
337 return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,))
338
339def _multiply_dispatcher(a, i):
340 return (a,)
341
342
343@array_function_dispatch(_multiply_dispatcher)
344def multiply(a, i):
345 """
346 Return (a * i), that is string multiple concatenation,
347 element-wise.
348
349 Values in `i` of less than 0 are treated as 0 (which yields an
350 empty string).
351
352 Parameters
353 ----------
354 a : array_like of str or unicode
355
356 i : array_like of ints
357
358 Returns
359 -------
360 out : ndarray
361 Output array of str or unicode, depending on input types
362
363 Examples
364 --------
365 >>> a = np.array(["a", "b", "c"])
366 >>> np.char.multiply(x, 3)
367 array(['aaa', 'bbb', 'ccc'], dtype='<U3')
368 >>> i = np.array([1, 2, 3])
369 >>> np.char.multiply(a, i)
370 array(['a', 'bb', 'ccc'], dtype='<U3')
371 >>> np.char.multiply(np.array(['a']), i)
372 array(['a', 'aa', 'aaa'], dtype='<U3')
373 >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
374 >>> np.char.multiply(a, 3)
375 array([['aaa', 'bbb', 'ccc'],
376 ['ddd', 'eee', 'fff']], dtype='<U3')
377 >>> np.char.multiply(a, i)
378 array([['a', 'bb', 'ccc'],
379 ['d', 'ee', 'fff']], dtype='<U3')
380 """
381 a_arr = numpy.asarray(a)
382 i_arr = numpy.asarray(i)
383 if not issubclass(i_arr.dtype.type, integer):
384 raise ValueError("Can only multiply by integers")
385 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
386 return _vec_string(
387 a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,))
388
389
390def _mod_dispatcher(a, values):
391 return (a, values)
392
393
394@array_function_dispatch(_mod_dispatcher)
395def mod(a, values):
396 """
397 Return (a % i), that is pre-Python 2.6 string formatting
398 (interpolation), element-wise for a pair of array_likes of str
399 or unicode.
400
401 Parameters
402 ----------
403 a : array_like of str or unicode
404
405 values : array_like of values
406 These values will be element-wise interpolated into the string.
407
408 Returns
409 -------
410 out : ndarray
411 Output array of str or unicode, depending on input types
412
413 See Also
414 --------
415 str.__mod__
416
417 """
418 return _to_bytes_or_str_array(
419 _vec_string(a, object_, '__mod__', (values,)), a)
420
421
422@array_function_dispatch(_unary_op_dispatcher)
423def capitalize(a):
424 """
425 Return a copy of `a` with only the first character of each element
426 capitalized.
427
428 Calls `str.capitalize` element-wise.
429
430 For 8-bit strings, this method is locale-dependent.
431
432 Parameters
433 ----------
434 a : array_like of str or unicode
435 Input array of strings to capitalize.
436
437 Returns
438 -------
439 out : ndarray
440 Output array of str or unicode, depending on input
441 types
442
443 See Also
444 --------
445 str.capitalize
446
447 Examples
448 --------
449 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
450 array(['a1b2', '1b2a', 'b2a1', '2a1b'],
451 dtype='|S4')
452 >>> np.char.capitalize(c)
453 array(['A1b2', '1b2a', 'B2a1', '2a1b'],
454 dtype='|S4')
455
456 """
457 a_arr = numpy.asarray(a)
458 return _vec_string(a_arr, a_arr.dtype, 'capitalize')
459
460
461def _center_dispatcher(a, width, fillchar=None):
462 return (a,)
463
464
465@array_function_dispatch(_center_dispatcher)
466def center(a, width, fillchar=' '):
467 """
468 Return a copy of `a` with its elements centered in a string of
469 length `width`.
470
471 Calls `str.center` element-wise.
472
473 Parameters
474 ----------
475 a : array_like of str or unicode
476
477 width : int
478 The length of the resulting strings
479 fillchar : str or unicode, optional
480 The padding character to use (default is space).
481
482 Returns
483 -------
484 out : ndarray
485 Output array of str or unicode, depending on input
486 types
487
488 See Also
489 --------
490 str.center
491
492 Notes
493 -----
494 This function is intended to work with arrays of strings. The
495 fill character is not applied to numeric types.
496
497 Examples
498 --------
499 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
500 array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
501 >>> np.char.center(c, width=9)
502 array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
503 >>> np.char.center(c, width=9, fillchar='*')
504 array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
505 >>> np.char.center(c, width=1)
506 array(['a', '1', 'b', '2'], dtype='<U1')
507
508 """
509 a_arr = numpy.asarray(a)
510 width_arr = numpy.asarray(width)
511 size = int(numpy.max(width_arr.flat))
512 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
513 fillchar = asbytes(fillchar)
514 return _vec_string(
515 a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar))
516
517
518def _count_dispatcher(a, sub, start=None, end=None):
519 return (a,)
520
521
522@array_function_dispatch(_count_dispatcher)
523def count(a, sub, start=0, end=None):
524 """
525 Returns an array with the number of non-overlapping occurrences of
526 substring `sub` in the range [`start`, `end`].
527
528 Calls `str.count` element-wise.
529
530 Parameters
531 ----------
532 a : array_like of str or unicode
533
534 sub : str or unicode
535 The substring to search for.
536
537 start, end : int, optional
538 Optional arguments `start` and `end` are interpreted as slice
539 notation to specify the range in which to count.
540
541 Returns
542 -------
543 out : ndarray
544 Output array of ints.
545
546 See Also
547 --------
548 str.count
549
550 Examples
551 --------
552 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
553 >>> c
554 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
555 >>> np.char.count(c, 'A')
556 array([3, 1, 1])
557 >>> np.char.count(c, 'aA')
558 array([3, 1, 0])
559 >>> np.char.count(c, 'A', start=1, end=4)
560 array([2, 1, 1])
561 >>> np.char.count(c, 'A', start=1, end=3)
562 array([1, 0, 0])
563
564 """
565 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
566
567
568def _code_dispatcher(a, encoding=None, errors=None):
569 return (a,)
570
571
572@array_function_dispatch(_code_dispatcher)
573def decode(a, encoding=None, errors=None):
574 r"""
575 Calls ``bytes.decode`` element-wise.
576
577 The set of available codecs comes from the Python standard library,
578 and may be extended at runtime. For more information, see the
579 :mod:`codecs` module.
580
581 Parameters
582 ----------
583 a : array_like of str or unicode
584
585 encoding : str, optional
586 The name of an encoding
587
588 errors : str, optional
589 Specifies how to handle encoding errors
590
591 Returns
592 -------
593 out : ndarray
594
595 See Also
596 --------
597 :py:meth:`bytes.decode`
598
599 Notes
600 -----
601 The type of the result will depend on the encoding specified.
602
603 Examples
604 --------
605 >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
606 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
607 >>> c
608 array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
609 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
610 >>> np.char.decode(c, encoding='cp037')
611 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
612
613 """
614 return _to_bytes_or_str_array(
615 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
616
617
618@array_function_dispatch(_code_dispatcher)
619def encode(a, encoding=None, errors=None):
620 """
621 Calls `str.encode` element-wise.
622
623 The set of available codecs comes from the Python standard library,
624 and may be extended at runtime. For more information, see the codecs
625 module.
626
627 Parameters
628 ----------
629 a : array_like of str or unicode
630
631 encoding : str, optional
632 The name of an encoding
633
634 errors : str, optional
635 Specifies how to handle encoding errors
636
637 Returns
638 -------
639 out : ndarray
640
641 See Also
642 --------
643 str.encode
644
645 Notes
646 -----
647 The type of the result will depend on the encoding specified.
648
649 """
650 return _to_bytes_or_str_array(
651 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
652
653
654def _endswith_dispatcher(a, suffix, start=None, end=None):
655 return (a,)
656
657
658@array_function_dispatch(_endswith_dispatcher)
659def endswith(a, suffix, start=0, end=None):
660 """
661 Returns a boolean array which is `True` where the string element
662 in `a` ends with `suffix`, otherwise `False`.
663
664 Calls `str.endswith` element-wise.
665
666 Parameters
667 ----------
668 a : array_like of str or unicode
669
670 suffix : str
671
672 start, end : int, optional
673 With optional `start`, test beginning at that position. With
674 optional `end`, stop comparing at that position.
675
676 Returns
677 -------
678 out : ndarray
679 Outputs an array of bools.
680
681 See Also
682 --------
683 str.endswith
684
685 Examples
686 --------
687 >>> s = np.array(['foo', 'bar'])
688 >>> s[0] = 'foo'
689 >>> s[1] = 'bar'
690 >>> s
691 array(['foo', 'bar'], dtype='<U3')
692 >>> np.char.endswith(s, 'ar')
693 array([False, True])
694 >>> np.char.endswith(s, 'a', start=1, end=2)
695 array([False, True])
696
697 """
698 return _vec_string(
699 a, bool_, 'endswith', [suffix, start] + _clean_args(end))
700
701
702def _expandtabs_dispatcher(a, tabsize=None):
703 return (a,)
704
705
706@array_function_dispatch(_expandtabs_dispatcher)
707def expandtabs(a, tabsize=8):
708 """
709 Return a copy of each string element where all tab characters are
710 replaced by one or more spaces.
711
712 Calls `str.expandtabs` element-wise.
713
714 Return a copy of each string element where all tab characters are
715 replaced by one or more spaces, depending on the current column
716 and the given `tabsize`. The column number is reset to zero after
717 each newline occurring in the string. This doesn't understand other
718 non-printing characters or escape sequences.
719
720 Parameters
721 ----------
722 a : array_like of str or unicode
723 Input array
724 tabsize : int, optional
725 Replace tabs with `tabsize` number of spaces. If not given defaults
726 to 8 spaces.
727
728 Returns
729 -------
730 out : ndarray
731 Output array of str or unicode, depending on input type
732
733 See Also
734 --------
735 str.expandtabs
736
737 """
738 return _to_bytes_or_str_array(
739 _vec_string(a, object_, 'expandtabs', (tabsize,)), a)
740
741
742@array_function_dispatch(_count_dispatcher)
743def find(a, sub, start=0, end=None):
744 """
745 For each element, return the lowest index in the string where
746 substring `sub` is found.
747
748 Calls `str.find` element-wise.
749
750 For each element, return the lowest index in the string where
751 substring `sub` is found, such that `sub` is contained in the
752 range [`start`, `end`].
753
754 Parameters
755 ----------
756 a : array_like of str or unicode
757
758 sub : str or unicode
759
760 start, end : int, optional
761 Optional arguments `start` and `end` are interpreted as in
762 slice notation.
763
764 Returns
765 -------
766 out : ndarray or int
767 Output array of ints. Returns -1 if `sub` is not found.
768
769 See Also
770 --------
771 str.find
772
773 Examples
774 --------
775 >>> a = np.array(["NumPy is a Python library"])
776 >>> np.char.find(a, "Python", start=0, end=None)
777 array([11])
778
779 """
780 return _vec_string(
781 a, int_, 'find', [sub, start] + _clean_args(end))
782
783
784@array_function_dispatch(_count_dispatcher)
785def index(a, sub, start=0, end=None):
786 """
787 Like `find`, but raises `ValueError` when the substring is not found.
788
789 Calls `str.index` element-wise.
790
791 Parameters
792 ----------
793 a : array_like of str or unicode
794
795 sub : str or unicode
796
797 start, end : int, optional
798
799 Returns
800 -------
801 out : ndarray
802 Output array of ints. Returns -1 if `sub` is not found.
803
804 See Also
805 --------
806 find, str.find
807
808 Examples
809 --------
810 >>> a = np.array(["Computer Science"])
811 >>> np.char.index(a, "Science", start=0, end=None)
812 array([9])
813
814 """
815 return _vec_string(
816 a, int_, 'index', [sub, start] + _clean_args(end))
817
818
819@array_function_dispatch(_unary_op_dispatcher)
820def isalnum(a):
821 """
822 Returns true for each element if all characters in the string are
823 alphanumeric and there is at least one character, false otherwise.
824
825 Calls `str.isalnum` element-wise.
826
827 For 8-bit strings, this method is locale-dependent.
828
829 Parameters
830 ----------
831 a : array_like of str or unicode
832
833 Returns
834 -------
835 out : ndarray
836 Output array of str or unicode, depending on input type
837
838 See Also
839 --------
840 str.isalnum
841 """
842 return _vec_string(a, bool_, 'isalnum')
843
844
845@array_function_dispatch(_unary_op_dispatcher)
846def isalpha(a):
847 """
848 Returns true for each element if all characters in the string are
849 alphabetic and there is at least one character, false otherwise.
850
851 Calls `str.isalpha` element-wise.
852
853 For 8-bit strings, this method is locale-dependent.
854
855 Parameters
856 ----------
857 a : array_like of str or unicode
858
859 Returns
860 -------
861 out : ndarray
862 Output array of bools
863
864 See Also
865 --------
866 str.isalpha
867 """
868 return _vec_string(a, bool_, 'isalpha')
869
870
871@array_function_dispatch(_unary_op_dispatcher)
872def isdigit(a):
873 """
874 Returns true for each element if all characters in the string are
875 digits and there is at least one character, false otherwise.
876
877 Calls `str.isdigit` element-wise.
878
879 For 8-bit strings, this method is locale-dependent.
880
881 Parameters
882 ----------
883 a : array_like of str or unicode
884
885 Returns
886 -------
887 out : ndarray
888 Output array of bools
889
890 See Also
891 --------
892 str.isdigit
893
894 Examples
895 --------
896 >>> a = np.array(['a', 'b', '0'])
897 >>> np.char.isdigit(a)
898 array([False, False, True])
899 >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])
900 >>> np.char.isdigit(a)
901 array([[False, False, True], [False, True, True]])
902 """
903 return _vec_string(a, bool_, 'isdigit')
904
905
906@array_function_dispatch(_unary_op_dispatcher)
907def islower(a):
908 """
909 Returns true for each element if all cased characters in the
910 string are lowercase and there is at least one cased character,
911 false otherwise.
912
913 Calls `str.islower` element-wise.
914
915 For 8-bit strings, this method is locale-dependent.
916
917 Parameters
918 ----------
919 a : array_like of str or unicode
920
921 Returns
922 -------
923 out : ndarray
924 Output array of bools
925
926 See Also
927 --------
928 str.islower
929 """
930 return _vec_string(a, bool_, 'islower')
931
932
933@array_function_dispatch(_unary_op_dispatcher)
934def isspace(a):
935 """
936 Returns true for each element if there are only whitespace
937 characters in the string and there is at least one character,
938 false otherwise.
939
940 Calls `str.isspace` element-wise.
941
942 For 8-bit strings, this method is locale-dependent.
943
944 Parameters
945 ----------
946 a : array_like of str or unicode
947
948 Returns
949 -------
950 out : ndarray
951 Output array of bools
952
953 See Also
954 --------
955 str.isspace
956 """
957 return _vec_string(a, bool_, 'isspace')
958
959
960@array_function_dispatch(_unary_op_dispatcher)
961def istitle(a):
962 """
963 Returns true for each element if the element is a titlecased
964 string and there is at least one character, false otherwise.
965
966 Call `str.istitle` element-wise.
967
968 For 8-bit strings, this method is locale-dependent.
969
970 Parameters
971 ----------
972 a : array_like of str or unicode
973
974 Returns
975 -------
976 out : ndarray
977 Output array of bools
978
979 See Also
980 --------
981 str.istitle
982 """
983 return _vec_string(a, bool_, 'istitle')
984
985
986@array_function_dispatch(_unary_op_dispatcher)
987def isupper(a):
988 """
989 Return true for each element if all cased characters in the
990 string are uppercase and there is at least one character, false
991 otherwise.
992
993 Call `str.isupper` element-wise.
994
995 For 8-bit strings, this method is locale-dependent.
996
997 Parameters
998 ----------
999 a : array_like of str or unicode
1000
1001 Returns
1002 -------
1003 out : ndarray
1004 Output array of bools
1005
1006 See Also
1007 --------
1008 str.isupper
1009
1010 Examples
1011 --------
1012 >>> str = "GHC"
1013 >>> np.char.isupper(str)
1014 array(True)
1015 >>> a = np.array(["hello", "HELLO", "Hello"])
1016 >>> np.char.isupper(a)
1017 array([False, True, False])
1018
1019 """
1020 return _vec_string(a, bool_, 'isupper')
1021
1022
1023def _join_dispatcher(sep, seq):
1024 return (sep, seq)
1025
1026
1027@array_function_dispatch(_join_dispatcher)
1028def join(sep, seq):
1029 """
1030 Return a string which is the concatenation of the strings in the
1031 sequence `seq`.
1032
1033 Calls `str.join` element-wise.
1034
1035 Parameters
1036 ----------
1037 sep : array_like of str or unicode
1038 seq : array_like of str or unicode
1039
1040 Returns
1041 -------
1042 out : ndarray
1043 Output array of str or unicode, depending on input types
1044
1045 See Also
1046 --------
1047 str.join
1048
1049 Examples
1050 --------
1051 >>> np.char.join('-', 'osd')
1052 array('o-s-d', dtype='<U5')
1053
1054 >>> np.char.join(['-', '.'], ['ghc', 'osd'])
1055 array(['g-h-c', 'o.s.d'], dtype='<U5')
1056
1057 """
1058 return _to_bytes_or_str_array(
1059 _vec_string(sep, object_, 'join', (seq,)), seq)
1060
1061
1062
1063def _just_dispatcher(a, width, fillchar=None):
1064 return (a,)
1065
1066
1067@array_function_dispatch(_just_dispatcher)
1068def ljust(a, width, fillchar=' '):
1069 """
1070 Return an array with the elements of `a` left-justified in a
1071 string of length `width`.
1072
1073 Calls `str.ljust` element-wise.
1074
1075 Parameters
1076 ----------
1077 a : array_like of str or unicode
1078
1079 width : int
1080 The length of the resulting strings
1081 fillchar : str or unicode, optional
1082 The character to use for padding
1083
1084 Returns
1085 -------
1086 out : ndarray
1087 Output array of str or unicode, depending on input type
1088
1089 See Also
1090 --------
1091 str.ljust
1092
1093 """
1094 a_arr = numpy.asarray(a)
1095 width_arr = numpy.asarray(width)
1096 size = int(numpy.max(width_arr.flat))
1097 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
1098 fillchar = asbytes(fillchar)
1099 return _vec_string(
1100 a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar))
1101
1102
1103@array_function_dispatch(_unary_op_dispatcher)
1104def lower(a):
1105 """
1106 Return an array with the elements converted to lowercase.
1107
1108 Call `str.lower` element-wise.
1109
1110 For 8-bit strings, this method is locale-dependent.
1111
1112 Parameters
1113 ----------
1114 a : array_like, {str, unicode}
1115 Input array.
1116
1117 Returns
1118 -------
1119 out : ndarray, {str, unicode}
1120 Output array of str or unicode, depending on input type
1121
1122 See Also
1123 --------
1124 str.lower
1125
1126 Examples
1127 --------
1128 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
1129 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1130 >>> np.char.lower(c)
1131 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1132
1133 """
1134 a_arr = numpy.asarray(a)
1135 return _vec_string(a_arr, a_arr.dtype, 'lower')
1136
1137
1138def _strip_dispatcher(a, chars=None):
1139 return (a,)
1140
1141
1142@array_function_dispatch(_strip_dispatcher)
1143def lstrip(a, chars=None):
1144 """
1145 For each element in `a`, return a copy with the leading characters
1146 removed.
1147
1148 Calls `str.lstrip` element-wise.
1149
1150 Parameters
1151 ----------
1152 a : array-like, {str, unicode}
1153 Input array.
1154
1155 chars : {str, unicode}, optional
1156 The `chars` argument is a string specifying the set of
1157 characters to be removed. If omitted or None, the `chars`
1158 argument defaults to removing whitespace. The `chars` argument
1159 is not a prefix; rather, all combinations of its values are
1160 stripped.
1161
1162 Returns
1163 -------
1164 out : ndarray, {str, unicode}
1165 Output array of str or unicode, depending on input type
1166
1167 See Also
1168 --------
1169 str.lstrip
1170
1171 Examples
1172 --------
1173 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1174 >>> c
1175 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1176
1177 The 'a' variable is unstripped from c[1] because whitespace leading.
1178
1179 >>> np.char.lstrip(c, 'a')
1180 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
1181
1182
1183 >>> np.char.lstrip(c, 'A') # leaves c unchanged
1184 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1185 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
1186 ... # XXX: is this a regression? This used to return True
1187 ... # np.char.lstrip(c,'') does not modify c at all.
1188 False
1189 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
1190 True
1191
1192 """
1193 a_arr = numpy.asarray(a)
1194 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
1195
1196
1197def _partition_dispatcher(a, sep):
1198 return (a,)
1199
1200
1201@array_function_dispatch(_partition_dispatcher)
1202def partition(a, sep):
1203 """
1204 Partition each element in `a` around `sep`.
1205
1206 Calls `str.partition` element-wise.
1207
1208 For each element in `a`, split the element as the first
1209 occurrence of `sep`, and return 3 strings containing the part
1210 before the separator, the separator itself, and the part after
1211 the separator. If the separator is not found, return 3 strings
1212 containing the string itself, followed by two empty strings.
1213
1214 Parameters
1215 ----------
1216 a : array_like, {str, unicode}
1217 Input array
1218 sep : {str, unicode}
1219 Separator to split each string element in `a`.
1220
1221 Returns
1222 -------
1223 out : ndarray, {str, unicode}
1224 Output array of str or unicode, depending on input type.
1225 The output array will have an extra dimension with 3
1226 elements per input element.
1227
1228 See Also
1229 --------
1230 str.partition
1231
1232 """
1233 return _to_bytes_or_str_array(
1234 _vec_string(a, object_, 'partition', (sep,)), a)
1235
1236
1237def _replace_dispatcher(a, old, new, count=None):
1238 return (a,)
1239
1240
1241@array_function_dispatch(_replace_dispatcher)
1242def replace(a, old, new, count=None):
1243 """
1244 For each element in `a`, return a copy of the string with all
1245 occurrences of substring `old` replaced by `new`.
1246
1247 Calls `str.replace` element-wise.
1248
1249 Parameters
1250 ----------
1251 a : array-like of str or unicode
1252
1253 old, new : str or unicode
1254
1255 count : int, optional
1256 If the optional argument `count` is given, only the first
1257 `count` occurrences are replaced.
1258
1259 Returns
1260 -------
1261 out : ndarray
1262 Output array of str or unicode, depending on input type
1263
1264 See Also
1265 --------
1266 str.replace
1267
1268 Examples
1269 --------
1270 >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
1271 >>> np.char.replace(a, 'mango', 'banana')
1272 array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
1273
1274 >>> a = np.array(["The dish is fresh", "This is it"])
1275 >>> np.char.replace(a, 'is', 'was')
1276 array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
1277 """
1278 return _to_bytes_or_str_array(
1279 _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
1280
1281
1282@array_function_dispatch(_count_dispatcher)
1283def rfind(a, sub, start=0, end=None):
1284 """
1285 For each element in `a`, return the highest index in the string
1286 where substring `sub` is found, such that `sub` is contained
1287 within [`start`, `end`].
1288
1289 Calls `str.rfind` element-wise.
1290
1291 Parameters
1292 ----------
1293 a : array-like of str or unicode
1294
1295 sub : str or unicode
1296
1297 start, end : int, optional
1298 Optional arguments `start` and `end` are interpreted as in
1299 slice notation.
1300
1301 Returns
1302 -------
1303 out : ndarray
1304 Output array of ints. Return -1 on failure.
1305
1306 See Also
1307 --------
1308 str.rfind
1309
1310 """
1311 return _vec_string(
1312 a, int_, 'rfind', [sub, start] + _clean_args(end))
1313
1314
1315@array_function_dispatch(_count_dispatcher)
1316def rindex(a, sub, start=0, end=None):
1317 """
1318 Like `rfind`, but raises `ValueError` when the substring `sub` is
1319 not found.
1320
1321 Calls `str.rindex` element-wise.
1322
1323 Parameters
1324 ----------
1325 a : array-like of str or unicode
1326
1327 sub : str or unicode
1328
1329 start, end : int, optional
1330
1331 Returns
1332 -------
1333 out : ndarray
1334 Output array of ints.
1335
1336 See Also
1337 --------
1338 rfind, str.rindex
1339
1340 """
1341 return _vec_string(
1342 a, int_, 'rindex', [sub, start] + _clean_args(end))
1343
1344
1345@array_function_dispatch(_just_dispatcher)
1346def rjust(a, width, fillchar=' '):
1347 """
1348 Return an array with the elements of `a` right-justified in a
1349 string of length `width`.
1350
1351 Calls `str.rjust` element-wise.
1352
1353 Parameters
1354 ----------
1355 a : array_like of str or unicode
1356
1357 width : int
1358 The length of the resulting strings
1359 fillchar : str or unicode, optional
1360 The character to use for padding
1361
1362 Returns
1363 -------
1364 out : ndarray
1365 Output array of str or unicode, depending on input type
1366
1367 See Also
1368 --------
1369 str.rjust
1370
1371 """
1372 a_arr = numpy.asarray(a)
1373 width_arr = numpy.asarray(width)
1374 size = int(numpy.max(width_arr.flat))
1375 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
1376 fillchar = asbytes(fillchar)
1377 return _vec_string(
1378 a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar))
1379
1380
1381@array_function_dispatch(_partition_dispatcher)
1382def rpartition(a, sep):
1383 """
1384 Partition (split) each element around the right-most separator.
1385
1386 Calls `str.rpartition` element-wise.
1387
1388 For each element in `a`, split the element as the last
1389 occurrence of `sep`, and return 3 strings containing the part
1390 before the separator, the separator itself, and the part after
1391 the separator. If the separator is not found, return 3 strings
1392 containing the string itself, followed by two empty strings.
1393
1394 Parameters
1395 ----------
1396 a : array_like of str or unicode
1397 Input array
1398 sep : str or unicode
1399 Right-most separator to split each element in array.
1400
1401 Returns
1402 -------
1403 out : ndarray
1404 Output array of string or unicode, depending on input
1405 type. The output array will have an extra dimension with
1406 3 elements per input element.
1407
1408 See Also
1409 --------
1410 str.rpartition
1411
1412 """
1413 return _to_bytes_or_str_array(
1414 _vec_string(a, object_, 'rpartition', (sep,)), a)
1415
1416
1417def _split_dispatcher(a, sep=None, maxsplit=None):
1418 return (a,)
1419
1420
1421@array_function_dispatch(_split_dispatcher)
1422def rsplit(a, sep=None, maxsplit=None):
1423 """
1424 For each element in `a`, return a list of the words in the
1425 string, using `sep` as the delimiter string.
1426
1427 Calls `str.rsplit` element-wise.
1428
1429 Except for splitting from the right, `rsplit`
1430 behaves like `split`.
1431
1432 Parameters
1433 ----------
1434 a : array_like of str or unicode
1435
1436 sep : str or unicode, optional
1437 If `sep` is not specified or None, any whitespace string
1438 is a separator.
1439 maxsplit : int, optional
1440 If `maxsplit` is given, at most `maxsplit` splits are done,
1441 the rightmost ones.
1442
1443 Returns
1444 -------
1445 out : ndarray
1446 Array of list objects
1447
1448 See Also
1449 --------
1450 str.rsplit, split
1451
1452 """
1453 # This will return an array of lists of different sizes, so we
1454 # leave it as an object array
1455 return _vec_string(
1456 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
1457
1458
1459def _strip_dispatcher(a, chars=None):
1460 return (a,)
1461
1462
1463@array_function_dispatch(_strip_dispatcher)
1464def rstrip(a, chars=None):
1465 """
1466 For each element in `a`, return a copy with the trailing
1467 characters removed.
1468
1469 Calls `str.rstrip` element-wise.
1470
1471 Parameters
1472 ----------
1473 a : array-like of str or unicode
1474
1475 chars : str or unicode, optional
1476 The `chars` argument is a string specifying the set of
1477 characters to be removed. If omitted or None, the `chars`
1478 argument defaults to removing whitespace. The `chars` argument
1479 is not a suffix; rather, all combinations of its values are
1480 stripped.
1481
1482 Returns
1483 -------
1484 out : ndarray
1485 Output array of str or unicode, depending on input type
1486
1487 See Also
1488 --------
1489 str.rstrip
1490
1491 Examples
1492 --------
1493 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
1494 array(['aAaAaA', 'abBABba'],
1495 dtype='|S7')
1496 >>> np.char.rstrip(c, b'a')
1497 array(['aAaAaA', 'abBABb'],
1498 dtype='|S7')
1499 >>> np.char.rstrip(c, b'A')
1500 array(['aAaAa', 'abBABba'],
1501 dtype='|S7')
1502
1503 """
1504 a_arr = numpy.asarray(a)
1505 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
1506
1507
1508@array_function_dispatch(_split_dispatcher)
1509def split(a, sep=None, maxsplit=None):
1510 """
1511 For each element in `a`, return a list of the words in the
1512 string, using `sep` as the delimiter string.
1513
1514 Calls `str.split` element-wise.
1515
1516 Parameters
1517 ----------
1518 a : array_like of str or unicode
1519
1520 sep : str or unicode, optional
1521 If `sep` is not specified or None, any whitespace string is a
1522 separator.
1523
1524 maxsplit : int, optional
1525 If `maxsplit` is given, at most `maxsplit` splits are done.
1526
1527 Returns
1528 -------
1529 out : ndarray
1530 Array of list objects
1531
1532 See Also
1533 --------
1534 str.split, rsplit
1535
1536 """
1537 # This will return an array of lists of different sizes, so we
1538 # leave it as an object array
1539 return _vec_string(
1540 a, object_, 'split', [sep] + _clean_args(maxsplit))
1541
1542
1543def _splitlines_dispatcher(a, keepends=None):
1544 return (a,)
1545
1546
1547@array_function_dispatch(_splitlines_dispatcher)
1548def splitlines(a, keepends=None):
1549 """
1550 For each element in `a`, return a list of the lines in the
1551 element, breaking at line boundaries.
1552
1553 Calls `str.splitlines` element-wise.
1554
1555 Parameters
1556 ----------
1557 a : array_like of str or unicode
1558
1559 keepends : bool, optional
1560 Line breaks are not included in the resulting list unless
1561 keepends is given and true.
1562
1563 Returns
1564 -------
1565 out : ndarray
1566 Array of list objects
1567
1568 See Also
1569 --------
1570 str.splitlines
1571
1572 """
1573 return _vec_string(
1574 a, object_, 'splitlines', _clean_args(keepends))
1575
1576
1577def _startswith_dispatcher(a, prefix, start=None, end=None):
1578 return (a,)
1579
1580
1581@array_function_dispatch(_startswith_dispatcher)
1582def startswith(a, prefix, start=0, end=None):
1583 """
1584 Returns a boolean array which is `True` where the string element
1585 in `a` starts with `prefix`, otherwise `False`.
1586
1587 Calls `str.startswith` element-wise.
1588
1589 Parameters
1590 ----------
1591 a : array_like of str or unicode
1592
1593 prefix : str
1594
1595 start, end : int, optional
1596 With optional `start`, test beginning at that position. With
1597 optional `end`, stop comparing at that position.
1598
1599 Returns
1600 -------
1601 out : ndarray
1602 Array of booleans
1603
1604 See Also
1605 --------
1606 str.startswith
1607
1608 """
1609 return _vec_string(
1610 a, bool_, 'startswith', [prefix, start] + _clean_args(end))
1611
1612
1613@array_function_dispatch(_strip_dispatcher)
1614def strip(a, chars=None):
1615 """
1616 For each element in `a`, return a copy with the leading and
1617 trailing characters removed.
1618
1619 Calls `str.strip` element-wise.
1620
1621 Parameters
1622 ----------
1623 a : array-like of str or unicode
1624
1625 chars : str or unicode, optional
1626 The `chars` argument is a string specifying the set of
1627 characters to be removed. If omitted or None, the `chars`
1628 argument defaults to removing whitespace. The `chars` argument
1629 is not a prefix or suffix; rather, all combinations of its
1630 values are stripped.
1631
1632 Returns
1633 -------
1634 out : ndarray
1635 Output array of str or unicode, depending on input type
1636
1637 See Also
1638 --------
1639 str.strip
1640
1641 Examples
1642 --------
1643 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1644 >>> c
1645 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1646 >>> np.char.strip(c)
1647 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
1648 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
1649 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
1650 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
1651 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
1652
1653 """
1654 a_arr = numpy.asarray(a)
1655 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
1656
1657
1658@array_function_dispatch(_unary_op_dispatcher)
1659def swapcase(a):
1660 """
1661 Return element-wise a copy of the string with
1662 uppercase characters converted to lowercase and vice versa.
1663
1664 Calls `str.swapcase` element-wise.
1665
1666 For 8-bit strings, this method is locale-dependent.
1667
1668 Parameters
1669 ----------
1670 a : array_like, {str, unicode}
1671 Input array.
1672
1673 Returns
1674 -------
1675 out : ndarray, {str, unicode}
1676 Output array of str or unicode, depending on input type
1677
1678 See Also
1679 --------
1680 str.swapcase
1681
1682 Examples
1683 --------
1684 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
1685 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
1686 dtype='|S5')
1687 >>> np.char.swapcase(c)
1688 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
1689 dtype='|S5')
1690
1691 """
1692 a_arr = numpy.asarray(a)
1693 return _vec_string(a_arr, a_arr.dtype, 'swapcase')
1694
1695
1696@array_function_dispatch(_unary_op_dispatcher)
1697def title(a):
1698 """
1699 Return element-wise title cased version of string or unicode.
1700
1701 Title case words start with uppercase characters, all remaining cased
1702 characters are lowercase.
1703
1704 Calls `str.title` element-wise.
1705
1706 For 8-bit strings, this method is locale-dependent.
1707
1708 Parameters
1709 ----------
1710 a : array_like, {str, unicode}
1711 Input array.
1712
1713 Returns
1714 -------
1715 out : ndarray
1716 Output array of str or unicode, depending on input type
1717
1718 See Also
1719 --------
1720 str.title
1721
1722 Examples
1723 --------
1724 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
1725 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
1726 dtype='|S5')
1727 >>> np.char.title(c)
1728 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
1729 dtype='|S5')
1730
1731 """
1732 a_arr = numpy.asarray(a)
1733 return _vec_string(a_arr, a_arr.dtype, 'title')
1734
1735
1736def _translate_dispatcher(a, table, deletechars=None):
1737 return (a,)
1738
1739
1740@array_function_dispatch(_translate_dispatcher)
1741def translate(a, table, deletechars=None):
1742 """
1743 For each element in `a`, return a copy of the string where all
1744 characters occurring in the optional argument `deletechars` are
1745 removed, and the remaining characters have been mapped through the
1746 given translation table.
1747
1748 Calls `str.translate` element-wise.
1749
1750 Parameters
1751 ----------
1752 a : array-like of str or unicode
1753
1754 table : str of length 256
1755
1756 deletechars : str
1757
1758 Returns
1759 -------
1760 out : ndarray
1761 Output array of str or unicode, depending on input type
1762
1763 See Also
1764 --------
1765 str.translate
1766
1767 """
1768 a_arr = numpy.asarray(a)
1769 if issubclass(a_arr.dtype.type, str_):
1770 return _vec_string(
1771 a_arr, a_arr.dtype, 'translate', (table,))
1772 else:
1773 return _vec_string(
1774 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
1775
1776
1777@array_function_dispatch(_unary_op_dispatcher)
1778def upper(a):
1779 """
1780 Return an array with the elements converted to uppercase.
1781
1782 Calls `str.upper` element-wise.
1783
1784 For 8-bit strings, this method is locale-dependent.
1785
1786 Parameters
1787 ----------
1788 a : array_like, {str, unicode}
1789 Input array.
1790
1791 Returns
1792 -------
1793 out : ndarray, {str, unicode}
1794 Output array of str or unicode, depending on input type
1795
1796 See Also
1797 --------
1798 str.upper
1799
1800 Examples
1801 --------
1802 >>> c = np.array(['a1b c', '1bca', 'bca1']); c
1803 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1804 >>> np.char.upper(c)
1805 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1806
1807 """
1808 a_arr = numpy.asarray(a)
1809 return _vec_string(a_arr, a_arr.dtype, 'upper')
1810
1811
1812def _zfill_dispatcher(a, width):
1813 return (a,)
1814
1815
1816@array_function_dispatch(_zfill_dispatcher)
1817def zfill(a, width):
1818 """
1819 Return the numeric string left-filled with zeros
1820
1821 Calls `str.zfill` element-wise.
1822
1823 Parameters
1824 ----------
1825 a : array_like, {str, unicode}
1826 Input array.
1827 width : int
1828 Width of string to left-fill elements in `a`.
1829
1830 Returns
1831 -------
1832 out : ndarray, {str, unicode}
1833 Output array of str or unicode, depending on input type
1834
1835 See Also
1836 --------
1837 str.zfill
1838
1839 """
1840 a_arr = numpy.asarray(a)
1841 width_arr = numpy.asarray(width)
1842 size = int(numpy.max(width_arr.flat))
1843 return _vec_string(
1844 a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,))
1845
1846
1847@array_function_dispatch(_unary_op_dispatcher)
1848def isnumeric(a):
1849 """
1850 For each element, return True if there are only numeric
1851 characters in the element.
1852
1853 Calls `str.isnumeric` element-wise.
1854
1855 Numeric characters include digit characters, and all characters
1856 that have the Unicode numeric value property, e.g. ``U+2155,
1857 VULGAR FRACTION ONE FIFTH``.
1858
1859 Parameters
1860 ----------
1861 a : array_like, unicode
1862 Input array.
1863
1864 Returns
1865 -------
1866 out : ndarray, bool
1867 Array of booleans of same shape as `a`.
1868
1869 See Also
1870 --------
1871 str.isnumeric
1872
1873 Examples
1874 --------
1875 >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])
1876 array([ True, False, False, False, False])
1877
1878 """
1879 if not _is_unicode(a):
1880 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1881 return _vec_string(a, bool_, 'isnumeric')
1882
1883
1884@array_function_dispatch(_unary_op_dispatcher)
1885def isdecimal(a):
1886 """
1887 For each element, return True if there are only decimal
1888 characters in the element.
1889
1890 Calls `str.isdecimal` element-wise.
1891
1892 Decimal characters include digit characters, and all characters
1893 that can be used to form decimal-radix numbers,
1894 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
1895
1896 Parameters
1897 ----------
1898 a : array_like, unicode
1899 Input array.
1900
1901 Returns
1902 -------
1903 out : ndarray, bool
1904 Array of booleans identical in shape to `a`.
1905
1906 See Also
1907 --------
1908 str.isdecimal
1909
1910 Examples
1911 --------
1912 >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])
1913 array([ True, False, False, False])
1914
1915 """
1916 if not _is_unicode(a):
1917 raise TypeError(
1918 "isdecimal is only available for Unicode strings and arrays")
1919 return _vec_string(a, bool_, 'isdecimal')
1920
1921
1922@set_module('numpy')
1923class chararray(ndarray):
1924 """
1925 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
1926 strides=None, order=None)
1927
1928 Provides a convenient view on arrays of string and unicode values.
1929
1930 .. note::
1931 The `chararray` class exists for backwards compatibility with
1932 Numarray, it is not recommended for new development. Starting from numpy
1933 1.4, if one needs arrays of strings, it is recommended to use arrays of
1934 `dtype` `object_`, `bytes_` or `str_`, and use the free functions
1935 in the `numpy.char` module for fast vectorized string operations.
1936
1937 Versus a regular NumPy array of type `str` or `unicode`, this
1938 class adds the following functionality:
1939
1940 1) values automatically have whitespace removed from the end
1941 when indexed
1942
1943 2) comparison operators automatically remove whitespace from the
1944 end when comparing values
1945
1946 3) vectorized string operations are provided as methods
1947 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
1948
1949 chararrays should be created using `numpy.char.array` or
1950 `numpy.char.asarray`, rather than this constructor directly.
1951
1952 This constructor creates the array, using `buffer` (with `offset`
1953 and `strides`) if it is not ``None``. If `buffer` is ``None``, then
1954 constructs a new array with `strides` in "C order", unless both
1955 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
1956 is in "Fortran order".
1957
1958 Methods
1959 -------
1960 astype
1961 argsort
1962 copy
1963 count
1964 decode
1965 dump
1966 dumps
1967 encode
1968 endswith
1969 expandtabs
1970 fill
1971 find
1972 flatten
1973 getfield
1974 index
1975 isalnum
1976 isalpha
1977 isdecimal
1978 isdigit
1979 islower
1980 isnumeric
1981 isspace
1982 istitle
1983 isupper
1984 item
1985 join
1986 ljust
1987 lower
1988 lstrip
1989 nonzero
1990 put
1991 ravel
1992 repeat
1993 replace
1994 reshape
1995 resize
1996 rfind
1997 rindex
1998 rjust
1999 rsplit
2000 rstrip
2001 searchsorted
2002 setfield
2003 setflags
2004 sort
2005 split
2006 splitlines
2007 squeeze
2008 startswith
2009 strip
2010 swapaxes
2011 swapcase
2012 take
2013 title
2014 tofile
2015 tolist
2016 tostring
2017 translate
2018 transpose
2019 upper
2020 view
2021 zfill
2022
2023 Parameters
2024 ----------
2025 shape : tuple
2026 Shape of the array.
2027 itemsize : int, optional
2028 Length of each array element, in number of characters. Default is 1.
2029 unicode : bool, optional
2030 Are the array elements of type unicode (True) or string (False).
2031 Default is False.
2032 buffer : object exposing the buffer interface or str, optional
2033 Memory address of the start of the array data. Default is None,
2034 in which case a new array is created.
2035 offset : int, optional
2036 Fixed stride displacement from the beginning of an axis?
2037 Default is 0. Needs to be >=0.
2038 strides : array_like of ints, optional
2039 Strides for the array (see `ndarray.strides` for full description).
2040 Default is None.
2041 order : {'C', 'F'}, optional
2042 The order in which the array data is stored in memory: 'C' ->
2043 "row major" order (the default), 'F' -> "column major"
2044 (Fortran) order.
2045
2046 Examples
2047 --------
2048 >>> charar = np.chararray((3, 3))
2049 >>> charar[:] = 'a'
2050 >>> charar
2051 chararray([[b'a', b'a', b'a'],
2052 [b'a', b'a', b'a'],
2053 [b'a', b'a', b'a']], dtype='|S1')
2054
2055 >>> charar = np.chararray(charar.shape, itemsize=5)
2056 >>> charar[:] = 'abc'
2057 >>> charar
2058 chararray([[b'abc', b'abc', b'abc'],
2059 [b'abc', b'abc', b'abc'],
2060 [b'abc', b'abc', b'abc']], dtype='|S5')
2061
2062 """
2063 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
2064 offset=0, strides=None, order='C'):
2065 global _globalvar
2066
2067 if unicode:
2068 dtype = str_
2069 else:
2070 dtype = bytes_
2071
2072 # force itemsize to be a Python int, since using NumPy integer
2073 # types results in itemsize.itemsize being used as the size of
2074 # strings in the new array.
2075 itemsize = int(itemsize)
2076
2077 if isinstance(buffer, str):
2078 # unicode objects do not have the buffer interface
2079 filler = buffer
2080 buffer = None
2081 else:
2082 filler = None
2083
2084 _globalvar = 1
2085 if buffer is None:
2086 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
2087 order=order)
2088 else:
2089 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
2090 buffer=buffer,
2091 offset=offset, strides=strides,
2092 order=order)
2093 if filler is not None:
2094 self[...] = filler
2095 _globalvar = 0
2096 return self
2097
2098 def __array_finalize__(self, obj):
2099 # The b is a special case because it is used for reconstructing.
2100 if not _globalvar and self.dtype.char not in 'SUbc':
2101 raise ValueError("Can only create a chararray from string data.")
2102
2103 def __getitem__(self, obj):
2104 val = ndarray.__getitem__(self, obj)
2105
2106 if isinstance(val, character):
2107 temp = val.rstrip()
2108 if len(temp) == 0:
2109 val = ''
2110 else:
2111 val = temp
2112
2113 return val
2114
2115 # IMPLEMENTATION NOTE: Most of the methods of this class are
2116 # direct delegations to the free functions in this module.
2117 # However, those that return an array of strings should instead
2118 # return a chararray, so some extra wrapping is required.
2119
2120 def __eq__(self, other):
2121 """
2122 Return (self == other) element-wise.
2123
2124 See Also
2125 --------
2126 equal
2127 """
2128 return equal(self, other)
2129
2130 def __ne__(self, other):
2131 """
2132 Return (self != other) element-wise.
2133
2134 See Also
2135 --------
2136 not_equal
2137 """
2138 return not_equal(self, other)
2139
2140 def __ge__(self, other):
2141 """
2142 Return (self >= other) element-wise.
2143
2144 See Also
2145 --------
2146 greater_equal
2147 """
2148 return greater_equal(self, other)
2149
2150 def __le__(self, other):
2151 """
2152 Return (self <= other) element-wise.
2153
2154 See Also
2155 --------
2156 less_equal
2157 """
2158 return less_equal(self, other)
2159
2160 def __gt__(self, other):
2161 """
2162 Return (self > other) element-wise.
2163
2164 See Also
2165 --------
2166 greater
2167 """
2168 return greater(self, other)
2169
2170 def __lt__(self, other):
2171 """
2172 Return (self < other) element-wise.
2173
2174 See Also
2175 --------
2176 less
2177 """
2178 return less(self, other)
2179
2180 def __add__(self, other):
2181 """
2182 Return (self + other), that is string concatenation,
2183 element-wise for a pair of array_likes of str or unicode.
2184
2185 See Also
2186 --------
2187 add
2188 """
2189 return asarray(add(self, other))
2190
2191 def __radd__(self, other):
2192 """
2193 Return (other + self), that is string concatenation,
2194 element-wise for a pair of array_likes of `bytes_` or `str_`.
2195
2196 See Also
2197 --------
2198 add
2199 """
2200 return asarray(add(numpy.asarray(other), self))
2201
2202 def __mul__(self, i):
2203 """
2204 Return (self * i), that is string multiple concatenation,
2205 element-wise.
2206
2207 See Also
2208 --------
2209 multiply
2210 """
2211 return asarray(multiply(self, i))
2212
2213 def __rmul__(self, i):
2214 """
2215 Return (self * i), that is string multiple concatenation,
2216 element-wise.
2217
2218 See Also
2219 --------
2220 multiply
2221 """
2222 return asarray(multiply(self, i))
2223
2224 def __mod__(self, i):
2225 """
2226 Return (self % i), that is pre-Python 2.6 string formatting
2227 (interpolation), element-wise for a pair of array_likes of `bytes_`
2228 or `str_`.
2229
2230 See Also
2231 --------
2232 mod
2233 """
2234 return asarray(mod(self, i))
2235
2236 def __rmod__(self, other):
2237 return NotImplemented
2238
2239 def argsort(self, axis=-1, kind=None, order=None):
2240 """
2241 Return the indices that sort the array lexicographically.
2242
2243 For full documentation see `numpy.argsort`, for which this method is
2244 in fact merely a "thin wrapper."
2245
2246 Examples
2247 --------
2248 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
2249 >>> c = c.view(np.chararray); c
2250 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
2251 dtype='|S5')
2252 >>> c[c.argsort()]
2253 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
2254 dtype='|S5')
2255
2256 """
2257 return self.__array__().argsort(axis, kind, order)
2258 argsort.__doc__ = ndarray.argsort.__doc__
2259
2260 def capitalize(self):
2261 """
2262 Return a copy of `self` with only the first character of each element
2263 capitalized.
2264
2265 See Also
2266 --------
2267 char.capitalize
2268
2269 """
2270 return asarray(capitalize(self))
2271
2272 def center(self, width, fillchar=' '):
2273 """
2274 Return a copy of `self` with its elements centered in a
2275 string of length `width`.
2276
2277 See Also
2278 --------
2279 center
2280 """
2281 return asarray(center(self, width, fillchar))
2282
2283 def count(self, sub, start=0, end=None):
2284 """
2285 Returns an array with the number of non-overlapping occurrences of
2286 substring `sub` in the range [`start`, `end`].
2287
2288 See Also
2289 --------
2290 char.count
2291
2292 """
2293 return count(self, sub, start, end)
2294
2295 def decode(self, encoding=None, errors=None):
2296 """
2297 Calls ``bytes.decode`` element-wise.
2298
2299 See Also
2300 --------
2301 char.decode
2302
2303 """
2304 return decode(self, encoding, errors)
2305
2306 def encode(self, encoding=None, errors=None):
2307 """
2308 Calls `str.encode` element-wise.
2309
2310 See Also
2311 --------
2312 char.encode
2313
2314 """
2315 return encode(self, encoding, errors)
2316
2317 def endswith(self, suffix, start=0, end=None):
2318 """
2319 Returns a boolean array which is `True` where the string element
2320 in `self` ends with `suffix`, otherwise `False`.
2321
2322 See Also
2323 --------
2324 char.endswith
2325
2326 """
2327 return endswith(self, suffix, start, end)
2328
2329 def expandtabs(self, tabsize=8):
2330 """
2331 Return a copy of each string element where all tab characters are
2332 replaced by one or more spaces.
2333
2334 See Also
2335 --------
2336 char.expandtabs
2337
2338 """
2339 return asarray(expandtabs(self, tabsize))
2340
2341 def find(self, sub, start=0, end=None):
2342 """
2343 For each element, return the lowest index in the string where
2344 substring `sub` is found.
2345
2346 See Also
2347 --------
2348 char.find
2349
2350 """
2351 return find(self, sub, start, end)
2352
2353 def index(self, sub, start=0, end=None):
2354 """
2355 Like `find`, but raises `ValueError` when the substring is not found.
2356
2357 See Also
2358 --------
2359 char.index
2360
2361 """
2362 return index(self, sub, start, end)
2363
2364 def isalnum(self):
2365 """
2366 Returns true for each element if all characters in the string
2367 are alphanumeric and there is at least one character, false
2368 otherwise.
2369
2370 See Also
2371 --------
2372 char.isalnum
2373
2374 """
2375 return isalnum(self)
2376
2377 def isalpha(self):
2378 """
2379 Returns true for each element if all characters in the string
2380 are alphabetic and there is at least one character, false
2381 otherwise.
2382
2383 See Also
2384 --------
2385 char.isalpha
2386
2387 """
2388 return isalpha(self)
2389
2390 def isdigit(self):
2391 """
2392 Returns true for each element if all characters in the string are
2393 digits and there is at least one character, false otherwise.
2394
2395 See Also
2396 --------
2397 char.isdigit
2398
2399 """
2400 return isdigit(self)
2401
2402 def islower(self):
2403 """
2404 Returns true for each element if all cased characters in the
2405 string are lowercase and there is at least one cased character,
2406 false otherwise.
2407
2408 See Also
2409 --------
2410 char.islower
2411
2412 """
2413 return islower(self)
2414
2415 def isspace(self):
2416 """
2417 Returns true for each element if there are only whitespace
2418 characters in the string and there is at least one character,
2419 false otherwise.
2420
2421 See Also
2422 --------
2423 char.isspace
2424
2425 """
2426 return isspace(self)
2427
2428 def istitle(self):
2429 """
2430 Returns true for each element if the element is a titlecased
2431 string and there is at least one character, false otherwise.
2432
2433 See Also
2434 --------
2435 char.istitle
2436
2437 """
2438 return istitle(self)
2439
2440 def isupper(self):
2441 """
2442 Returns true for each element if all cased characters in the
2443 string are uppercase and there is at least one character, false
2444 otherwise.
2445
2446 See Also
2447 --------
2448 char.isupper
2449
2450 """
2451 return isupper(self)
2452
2453 def join(self, seq):
2454 """
2455 Return a string which is the concatenation of the strings in the
2456 sequence `seq`.
2457
2458 See Also
2459 --------
2460 char.join
2461
2462 """
2463 return join(self, seq)
2464
2465 def ljust(self, width, fillchar=' '):
2466 """
2467 Return an array with the elements of `self` left-justified in a
2468 string of length `width`.
2469
2470 See Also
2471 --------
2472 char.ljust
2473
2474 """
2475 return asarray(ljust(self, width, fillchar))
2476
2477 def lower(self):
2478 """
2479 Return an array with the elements of `self` converted to
2480 lowercase.
2481
2482 See Also
2483 --------
2484 char.lower
2485
2486 """
2487 return asarray(lower(self))
2488
2489 def lstrip(self, chars=None):
2490 """
2491 For each element in `self`, return a copy with the leading characters
2492 removed.
2493
2494 See Also
2495 --------
2496 char.lstrip
2497
2498 """
2499 return asarray(lstrip(self, chars))
2500
2501 def partition(self, sep):
2502 """
2503 Partition each element in `self` around `sep`.
2504
2505 See Also
2506 --------
2507 partition
2508 """
2509 return asarray(partition(self, sep))
2510
2511 def replace(self, old, new, count=None):
2512 """
2513 For each element in `self`, return a copy of the string with all
2514 occurrences of substring `old` replaced by `new`.
2515
2516 See Also
2517 --------
2518 char.replace
2519
2520 """
2521 return asarray(replace(self, old, new, count))
2522
2523 def rfind(self, sub, start=0, end=None):
2524 """
2525 For each element in `self`, return the highest index in the string
2526 where substring `sub` is found, such that `sub` is contained
2527 within [`start`, `end`].
2528
2529 See Also
2530 --------
2531 char.rfind
2532
2533 """
2534 return rfind(self, sub, start, end)
2535
2536 def rindex(self, sub, start=0, end=None):
2537 """
2538 Like `rfind`, but raises `ValueError` when the substring `sub` is
2539 not found.
2540
2541 See Also
2542 --------
2543 char.rindex
2544
2545 """
2546 return rindex(self, sub, start, end)
2547
2548 def rjust(self, width, fillchar=' '):
2549 """
2550 Return an array with the elements of `self`
2551 right-justified in a string of length `width`.
2552
2553 See Also
2554 --------
2555 char.rjust
2556
2557 """
2558 return asarray(rjust(self, width, fillchar))
2559
2560 def rpartition(self, sep):
2561 """
2562 Partition each element in `self` around `sep`.
2563
2564 See Also
2565 --------
2566 rpartition
2567 """
2568 return asarray(rpartition(self, sep))
2569
2570 def rsplit(self, sep=None, maxsplit=None):
2571 """
2572 For each element in `self`, return a list of the words in
2573 the string, using `sep` as the delimiter string.
2574
2575 See Also
2576 --------
2577 char.rsplit
2578
2579 """
2580 return rsplit(self, sep, maxsplit)
2581
2582 def rstrip(self, chars=None):
2583 """
2584 For each element in `self`, return a copy with the trailing
2585 characters removed.
2586
2587 See Also
2588 --------
2589 char.rstrip
2590
2591 """
2592 return asarray(rstrip(self, chars))
2593
2594 def split(self, sep=None, maxsplit=None):
2595 """
2596 For each element in `self`, return a list of the words in the
2597 string, using `sep` as the delimiter string.
2598
2599 See Also
2600 --------
2601 char.split
2602
2603 """
2604 return split(self, sep, maxsplit)
2605
2606 def splitlines(self, keepends=None):
2607 """
2608 For each element in `self`, return a list of the lines in the
2609 element, breaking at line boundaries.
2610
2611 See Also
2612 --------
2613 char.splitlines
2614
2615 """
2616 return splitlines(self, keepends)
2617
2618 def startswith(self, prefix, start=0, end=None):
2619 """
2620 Returns a boolean array which is `True` where the string element
2621 in `self` starts with `prefix`, otherwise `False`.
2622
2623 See Also
2624 --------
2625 char.startswith
2626
2627 """
2628 return startswith(self, prefix, start, end)
2629
2630 def strip(self, chars=None):
2631 """
2632 For each element in `self`, return a copy with the leading and
2633 trailing characters removed.
2634
2635 See Also
2636 --------
2637 char.strip
2638
2639 """
2640 return asarray(strip(self, chars))
2641
2642 def swapcase(self):
2643 """
2644 For each element in `self`, return a copy of the string with
2645 uppercase characters converted to lowercase and vice versa.
2646
2647 See Also
2648 --------
2649 char.swapcase
2650
2651 """
2652 return asarray(swapcase(self))
2653
2654 def title(self):
2655 """
2656 For each element in `self`, return a titlecased version of the
2657 string: words start with uppercase characters, all remaining cased
2658 characters are lowercase.
2659
2660 See Also
2661 --------
2662 char.title
2663
2664 """
2665 return asarray(title(self))
2666
2667 def translate(self, table, deletechars=None):
2668 """
2669 For each element in `self`, return a copy of the string where
2670 all characters occurring in the optional argument
2671 `deletechars` are removed, and the remaining characters have
2672 been mapped through the given translation table.
2673
2674 See Also
2675 --------
2676 char.translate
2677
2678 """
2679 return asarray(translate(self, table, deletechars))
2680
2681 def upper(self):
2682 """
2683 Return an array with the elements of `self` converted to
2684 uppercase.
2685
2686 See Also
2687 --------
2688 char.upper
2689
2690 """
2691 return asarray(upper(self))
2692
2693 def zfill(self, width):
2694 """
2695 Return the numeric string left-filled with zeros in a string of
2696 length `width`.
2697
2698 See Also
2699 --------
2700 char.zfill
2701
2702 """
2703 return asarray(zfill(self, width))
2704
2705 def isnumeric(self):
2706 """
2707 For each element in `self`, return True if there are only
2708 numeric characters in the element.
2709
2710 See Also
2711 --------
2712 char.isnumeric
2713
2714 """
2715 return isnumeric(self)
2716
2717 def isdecimal(self):
2718 """
2719 For each element in `self`, return True if there are only
2720 decimal characters in the element.
2721
2722 See Also
2723 --------
2724 char.isdecimal
2725
2726 """
2727 return isdecimal(self)
2728
2729
2730@set_module("numpy.char")
2731def array(obj, itemsize=None, copy=True, unicode=None, order=None):
2732 """
2733 Create a `chararray`.
2734
2735 .. note::
2736 This class is provided for numarray backward-compatibility.
2737 New code (not concerned with numarray compatibility) should use
2738 arrays of type `bytes_` or `str_` and use the free functions
2739 in :mod:`numpy.char <numpy.core.defchararray>` for fast
2740 vectorized string operations instead.
2741
2742 Versus a regular NumPy array of type `str` or `unicode`, this
2743 class adds the following functionality:
2744
2745 1) values automatically have whitespace removed from the end
2746 when indexed
2747
2748 2) comparison operators automatically remove whitespace from the
2749 end when comparing values
2750
2751 3) vectorized string operations are provided as methods
2752 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
2753
2754 Parameters
2755 ----------
2756 obj : array of str or unicode-like
2757
2758 itemsize : int, optional
2759 `itemsize` is the number of characters per scalar in the
2760 resulting array. If `itemsize` is None, and `obj` is an
2761 object array or a Python list, the `itemsize` will be
2762 automatically determined. If `itemsize` is provided and `obj`
2763 is of type str or unicode, then the `obj` string will be
2764 chunked into `itemsize` pieces.
2765
2766 copy : bool, optional
2767 If true (default), then the object is copied. Otherwise, a copy
2768 will only be made if __array__ returns a copy, if obj is a
2769 nested sequence, or if a copy is needed to satisfy any of the other
2770 requirements (`itemsize`, unicode, `order`, etc.).
2771
2772 unicode : bool, optional
2773 When true, the resulting `chararray` can contain Unicode
2774 characters, when false only 8-bit characters. If unicode is
2775 None and `obj` is one of the following:
2776
2777 - a `chararray`,
2778 - an ndarray of type `str` or `unicode`
2779 - a Python str or unicode object,
2780
2781 then the unicode setting of the output array will be
2782 automatically determined.
2783
2784 order : {'C', 'F', 'A'}, optional
2785 Specify the order of the array. If order is 'C' (default), then the
2786 array will be in C-contiguous order (last-index varies the
2787 fastest). If order is 'F', then the returned array
2788 will be in Fortran-contiguous order (first-index varies the
2789 fastest). If order is 'A', then the returned array may
2790 be in any order (either C-, Fortran-contiguous, or even
2791 discontiguous).
2792 """
2793 if isinstance(obj, (bytes, str)):
2794 if unicode is None:
2795 if isinstance(obj, str):
2796 unicode = True
2797 else:
2798 unicode = False
2799
2800 if itemsize is None:
2801 itemsize = len(obj)
2802 shape = len(obj) // itemsize
2803
2804 return chararray(shape, itemsize=itemsize, unicode=unicode,
2805 buffer=obj, order=order)
2806
2807 if isinstance(obj, (list, tuple)):
2808 obj = numpy.asarray(obj)
2809
2810 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
2811 # If we just have a vanilla chararray, create a chararray
2812 # view around it.
2813 if not isinstance(obj, chararray):
2814 obj = obj.view(chararray)
2815
2816 if itemsize is None:
2817 itemsize = obj.itemsize
2818 # itemsize is in 8-bit chars, so for Unicode, we need
2819 # to divide by the size of a single Unicode character,
2820 # which for NumPy is always 4
2821 if issubclass(obj.dtype.type, str_):
2822 itemsize //= 4
2823
2824 if unicode is None:
2825 if issubclass(obj.dtype.type, str_):
2826 unicode = True
2827 else:
2828 unicode = False
2829
2830 if unicode:
2831 dtype = str_
2832 else:
2833 dtype = bytes_
2834
2835 if order is not None:
2836 obj = numpy.asarray(obj, order=order)
2837 if (copy or
2838 (itemsize != obj.itemsize) or
2839 (not unicode and isinstance(obj, str_)) or
2840 (unicode and isinstance(obj, bytes_))):
2841 obj = obj.astype((dtype, int(itemsize)))
2842 return obj
2843
2844 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
2845 if itemsize is None:
2846 # Since no itemsize was specified, convert the input array to
2847 # a list so the ndarray constructor will automatically
2848 # determine the itemsize for us.
2849 obj = obj.tolist()
2850 # Fall through to the default case
2851
2852 if unicode:
2853 dtype = str_
2854 else:
2855 dtype = bytes_
2856
2857 if itemsize is None:
2858 val = narray(obj, dtype=dtype, order=order, subok=True)
2859 else:
2860 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
2861 return val.view(chararray)
2862
2863
2864@set_module("numpy.char")
2865def asarray(obj, itemsize=None, unicode=None, order=None):
2866 """
2867 Convert the input to a `chararray`, copying the data only if
2868 necessary.
2869
2870 Versus a regular NumPy array of type `str` or `unicode`, this
2871 class adds the following functionality:
2872
2873 1) values automatically have whitespace removed from the end
2874 when indexed
2875
2876 2) comparison operators automatically remove whitespace from the
2877 end when comparing values
2878
2879 3) vectorized string operations are provided as methods
2880 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
2881
2882 Parameters
2883 ----------
2884 obj : array of str or unicode-like
2885
2886 itemsize : int, optional
2887 `itemsize` is the number of characters per scalar in the
2888 resulting array. If `itemsize` is None, and `obj` is an
2889 object array or a Python list, the `itemsize` will be
2890 automatically determined. If `itemsize` is provided and `obj`
2891 is of type str or unicode, then the `obj` string will be
2892 chunked into `itemsize` pieces.
2893
2894 unicode : bool, optional
2895 When true, the resulting `chararray` can contain Unicode
2896 characters, when false only 8-bit characters. If unicode is
2897 None and `obj` is one of the following:
2898
2899 - a `chararray`,
2900 - an ndarray of type `str` or 'unicode`
2901 - a Python str or unicode object,
2902
2903 then the unicode setting of the output array will be
2904 automatically determined.
2905
2906 order : {'C', 'F'}, optional
2907 Specify the order of the array. If order is 'C' (default), then the
2908 array will be in C-contiguous order (last-index varies the
2909 fastest). If order is 'F', then the returned array
2910 will be in Fortran-contiguous order (first-index varies the
2911 fastest).
2912 """
2913 return array(obj, itemsize, copy=False,
2914 unicode=unicode, order=order)