1"""
2This module contains a set of functions for vectorized string
3operations and methods.
4
5.. note::
6 The `chararray` class exists for backwards compatibility with
7 Numarray, it is not recommended for new development. Starting from numpy
8 1.4, if one needs arrays of strings, it is recommended to use arrays of
9 `dtype` `object_`, `string_` or `unicode_`, and use the free functions
10 in the `numpy.char` module for fast vectorized string operations.
11
12Some methods will only be available if the corresponding string method is
13available in your version of Python.
14
15The preferred alias for `defchararray` is `numpy.char`.
16
17"""
18import functools
19from .numerictypes import (
20 string_, unicode_, integer, int_, object_, bool_, character)
21from .numeric import ndarray, compare_chararrays
22from .numeric import array as narray
23from numpy.core.multiarray import _vec_string
24from numpy.core.overrides import set_module
25from numpy.core import overrides
26from numpy.compat import asbytes
27import numpy
28
29__all__ = [
30 'equal', 'not_equal', 'greater_equal', 'less_equal',
31 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
32 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
33 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
34 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
35 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
36 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
37 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
38 'array', 'asarray'
39 ]
40
41
42_globalvar = 0
43
44array_function_dispatch = functools.partial(
45 overrides.array_function_dispatch, module='numpy.char')
46
47
48def _use_unicode(*args):
49 """
50 Helper function for determining the output type of some string
51 operations.
52
53 For an operation on two ndarrays, if at least one is unicode, the
54 result should be unicode.
55 """
56 for x in args:
57 if (isinstance(x, str) or
58 issubclass(numpy.asarray(x).dtype.type, unicode_)):
59 return unicode_
60 return string_
61
62def _to_string_or_unicode_array(result):
63 """
64 Helper function to cast a result back into a string or unicode array
65 if an object array must be used as an intermediary.
66 """
67 return numpy.asarray(result.tolist())
68
69def _clean_args(*args):
70 """
71 Helper function for delegating arguments to Python string
72 functions.
73
74 Many of the Python string operations that have optional arguments
75 do not use 'None' to indicate a default value. In these cases,
76 we need to remove all None arguments, and those following them.
77 """
78 newargs = []
79 for chk in args:
80 if chk is None:
81 break
82 newargs.append(chk)
83 return newargs
84
85def _get_num_chars(a):
86 """
87 Helper function that returns the number of characters per field in
88 a string or unicode array. This is to abstract out the fact that
89 for a unicode array this is itemsize / 4.
90 """
91 if issubclass(a.dtype.type, unicode_):
92 return a.itemsize // 4
93 return a.itemsize
94
95
96def _binary_op_dispatcher(x1, x2):
97 return (x1, x2)
98
99
100@array_function_dispatch(_binary_op_dispatcher)
101def equal(x1, x2):
102 """
103 Return (x1 == x2) element-wise.
104
105 Unlike `numpy.equal`, this comparison is performed by first
106 stripping whitespace characters from the end of the string. This
107 behavior is provided for backward-compatibility with numarray.
108
109 Parameters
110 ----------
111 x1, x2 : array_like of str or unicode
112 Input arrays of the same shape.
113
114 Returns
115 -------
116 out : ndarray
117 Output array of bools.
118
119 See Also
120 --------
121 not_equal, greater_equal, less_equal, greater, less
122 """
123 return compare_chararrays(x1, x2, '==', True)
124
125
126@array_function_dispatch(_binary_op_dispatcher)
127def not_equal(x1, x2):
128 """
129 Return (x1 != x2) element-wise.
130
131 Unlike `numpy.not_equal`, this comparison is performed by first
132 stripping whitespace characters from the end of the string. This
133 behavior is provided for backward-compatibility with numarray.
134
135 Parameters
136 ----------
137 x1, x2 : array_like of str or unicode
138 Input arrays of the same shape.
139
140 Returns
141 -------
142 out : ndarray
143 Output array of bools.
144
145 See Also
146 --------
147 equal, greater_equal, less_equal, greater, less
148 """
149 return compare_chararrays(x1, x2, '!=', True)
150
151
152@array_function_dispatch(_binary_op_dispatcher)
153def greater_equal(x1, x2):
154 """
155 Return (x1 >= x2) element-wise.
156
157 Unlike `numpy.greater_equal`, this comparison is performed by
158 first stripping whitespace characters from the end of the string.
159 This behavior is provided for backward-compatibility with
160 numarray.
161
162 Parameters
163 ----------
164 x1, x2 : array_like of str or unicode
165 Input arrays of the same shape.
166
167 Returns
168 -------
169 out : ndarray
170 Output array of bools.
171
172 See Also
173 --------
174 equal, not_equal, less_equal, greater, less
175 """
176 return compare_chararrays(x1, x2, '>=', True)
177
178
179@array_function_dispatch(_binary_op_dispatcher)
180def less_equal(x1, x2):
181 """
182 Return (x1 <= x2) element-wise.
183
184 Unlike `numpy.less_equal`, this comparison is performed by first
185 stripping whitespace characters from the end of the string. This
186 behavior is provided for backward-compatibility with numarray.
187
188 Parameters
189 ----------
190 x1, x2 : array_like of str or unicode
191 Input arrays of the same shape.
192
193 Returns
194 -------
195 out : ndarray
196 Output array of bools.
197
198 See Also
199 --------
200 equal, not_equal, greater_equal, greater, less
201 """
202 return compare_chararrays(x1, x2, '<=', True)
203
204
205@array_function_dispatch(_binary_op_dispatcher)
206def greater(x1, x2):
207 """
208 Return (x1 > x2) element-wise.
209
210 Unlike `numpy.greater`, this comparison is performed by first
211 stripping whitespace characters from the end of the string. This
212 behavior is provided for backward-compatibility with numarray.
213
214 Parameters
215 ----------
216 x1, x2 : array_like of str or unicode
217 Input arrays of the same shape.
218
219 Returns
220 -------
221 out : ndarray
222 Output array of bools.
223
224 See Also
225 --------
226 equal, not_equal, greater_equal, less_equal, less
227 """
228 return compare_chararrays(x1, x2, '>', True)
229
230
231@array_function_dispatch(_binary_op_dispatcher)
232def less(x1, x2):
233 """
234 Return (x1 < x2) element-wise.
235
236 Unlike `numpy.greater`, this comparison is performed by first
237 stripping whitespace characters from the end of the string. This
238 behavior is provided for backward-compatibility with numarray.
239
240 Parameters
241 ----------
242 x1, x2 : array_like of str or unicode
243 Input arrays of the same shape.
244
245 Returns
246 -------
247 out : ndarray
248 Output array of bools.
249
250 See Also
251 --------
252 equal, not_equal, greater_equal, less_equal, greater
253 """
254 return compare_chararrays(x1, x2, '<', True)
255
256
257def _unary_op_dispatcher(a):
258 return (a,)
259
260
261@array_function_dispatch(_unary_op_dispatcher)
262def str_len(a):
263 """
264 Return len(a) element-wise.
265
266 Parameters
267 ----------
268 a : array_like of str or unicode
269
270 Returns
271 -------
272 out : ndarray
273 Output array of integers
274
275 See Also
276 --------
277 builtins.len
278
279 Examples
280 --------
281 >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
282 >>> np.char.str_len(a)
283 array([23, 15])
284 >>> a = np.array([u'\u0420', u'\u043e'])
285 >>> np.char.str_len(a)
286 array([1, 1])
287 >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
288 >>> np.char.str_len(a)
289 array([[5, 5], [1, 1]])
290 """
291 # Note: __len__, etc. currently return ints, which are not C-integers.
292 # Generally intp would be expected for lengths, although int is sufficient
293 # due to the dtype itemsize limitation.
294 return _vec_string(a, int_, '__len__')
295
296
297@array_function_dispatch(_binary_op_dispatcher)
298def add(x1, x2):
299 """
300 Return element-wise string concatenation for two arrays of str or unicode.
301
302 Arrays `x1` and `x2` must have the same shape.
303
304 Parameters
305 ----------
306 x1 : array_like of str or unicode
307 Input array.
308 x2 : array_like of str or unicode
309 Input array.
310
311 Returns
312 -------
313 add : ndarray
314 Output array of `string_` or `unicode_`, depending on input types
315 of the same shape as `x1` and `x2`.
316
317 """
318 arr1 = numpy.asarray(x1)
319 arr2 = numpy.asarray(x2)
320 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
321 dtype = _use_unicode(arr1, arr2)
322 return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
323
324
325def _multiply_dispatcher(a, i):
326 return (a,)
327
328
329@array_function_dispatch(_multiply_dispatcher)
330def multiply(a, i):
331 """
332 Return (a * i), that is string multiple concatenation,
333 element-wise.
334
335 Values in `i` of less than 0 are treated as 0 (which yields an
336 empty string).
337
338 Parameters
339 ----------
340 a : array_like of str or unicode
341
342 i : array_like of ints
343
344 Returns
345 -------
346 out : ndarray
347 Output array of str or unicode, depending on input types
348
349 Examples
350 --------
351 >>> a = np.array(["a", "b", "c"])
352 >>> np.char.multiply(x, 3)
353 array(['aaa', 'bbb', 'ccc'], dtype='<U3')
354 >>> i = np.array([1, 2, 3])
355 >>> np.char.multiply(a, i)
356 array(['a', 'bb', 'ccc'], dtype='<U3')
357 >>> np.char.multiply(np.array(['a']), i)
358 array(['a', 'aa', 'aaa'], dtype='<U3')
359 >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
360 >>> np.char.multiply(a, 3)
361 array([['aaa', 'bbb', 'ccc'],
362 ['ddd', 'eee', 'fff']], dtype='<U3')
363 >>> np.char.multiply(a, i)
364 array([['a', 'bb', 'ccc'],
365 ['d', 'ee', 'fff']], dtype='<U3')
366 """
367 a_arr = numpy.asarray(a)
368 i_arr = numpy.asarray(i)
369 if not issubclass(i_arr.dtype.type, integer):
370 raise ValueError("Can only multiply by integers")
371 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
372 return _vec_string(
373 a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
374
375
376def _mod_dispatcher(a, values):
377 return (a, values)
378
379
380@array_function_dispatch(_mod_dispatcher)
381def mod(a, values):
382 """
383 Return (a % i), that is pre-Python 2.6 string formatting
384 (interpolation), element-wise for a pair of array_likes of str
385 or unicode.
386
387 Parameters
388 ----------
389 a : array_like of str or unicode
390
391 values : array_like of values
392 These values will be element-wise interpolated into the string.
393
394 Returns
395 -------
396 out : ndarray
397 Output array of str or unicode, depending on input types
398
399 See Also
400 --------
401 str.__mod__
402
403 """
404 return _to_string_or_unicode_array(
405 _vec_string(a, object_, '__mod__', (values,)))
406
407
408@array_function_dispatch(_unary_op_dispatcher)
409def capitalize(a):
410 """
411 Return a copy of `a` with only the first character of each element
412 capitalized.
413
414 Calls `str.capitalize` element-wise.
415
416 For 8-bit strings, this method is locale-dependent.
417
418 Parameters
419 ----------
420 a : array_like of str or unicode
421 Input array of strings to capitalize.
422
423 Returns
424 -------
425 out : ndarray
426 Output array of str or unicode, depending on input
427 types
428
429 See Also
430 --------
431 str.capitalize
432
433 Examples
434 --------
435 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
436 array(['a1b2', '1b2a', 'b2a1', '2a1b'],
437 dtype='|S4')
438 >>> np.char.capitalize(c)
439 array(['A1b2', '1b2a', 'B2a1', '2a1b'],
440 dtype='|S4')
441
442 """
443 a_arr = numpy.asarray(a)
444 return _vec_string(a_arr, a_arr.dtype, 'capitalize')
445
446
447def _center_dispatcher(a, width, fillchar=None):
448 return (a,)
449
450
451@array_function_dispatch(_center_dispatcher)
452def center(a, width, fillchar=' '):
453 """
454 Return a copy of `a` with its elements centered in a string of
455 length `width`.
456
457 Calls `str.center` element-wise.
458
459 Parameters
460 ----------
461 a : array_like of str or unicode
462
463 width : int
464 The length of the resulting strings
465 fillchar : str or unicode, optional
466 The padding character to use (default is space).
467
468 Returns
469 -------
470 out : ndarray
471 Output array of str or unicode, depending on input
472 types
473
474 See Also
475 --------
476 str.center
477
478 Notes
479 -----
480 This function is intended to work with arrays of strings. The
481 fill character is not applied to numeric types.
482
483 Examples
484 --------
485 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
486 array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
487 >>> np.char.center(c, width=9)
488 array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
489 >>> np.char.center(c, width=9, fillchar='*')
490 array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
491 >>> np.char.center(c, width=1)
492 array(['a', '1', 'b', '2'], dtype='<U1')
493
494 """
495 a_arr = numpy.asarray(a)
496 width_arr = numpy.asarray(width)
497 size = int(numpy.max(width_arr.flat))
498 if numpy.issubdtype(a_arr.dtype, numpy.string_):
499 fillchar = asbytes(fillchar)
500 return _vec_string(
501 a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
502
503
504def _count_dispatcher(a, sub, start=None, end=None):
505 return (a,)
506
507
508@array_function_dispatch(_count_dispatcher)
509def count(a, sub, start=0, end=None):
510 """
511 Returns an array with the number of non-overlapping occurrences of
512 substring `sub` in the range [`start`, `end`].
513
514 Calls `str.count` element-wise.
515
516 Parameters
517 ----------
518 a : array_like of str or unicode
519
520 sub : str or unicode
521 The substring to search for.
522
523 start, end : int, optional
524 Optional arguments `start` and `end` are interpreted as slice
525 notation to specify the range in which to count.
526
527 Returns
528 -------
529 out : ndarray
530 Output array of ints.
531
532 See Also
533 --------
534 str.count
535
536 Examples
537 --------
538 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
539 >>> c
540 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
541 >>> np.char.count(c, 'A')
542 array([3, 1, 1])
543 >>> np.char.count(c, 'aA')
544 array([3, 1, 0])
545 >>> np.char.count(c, 'A', start=1, end=4)
546 array([2, 1, 1])
547 >>> np.char.count(c, 'A', start=1, end=3)
548 array([1, 0, 0])
549
550 """
551 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
552
553
554def _code_dispatcher(a, encoding=None, errors=None):
555 return (a,)
556
557
558@array_function_dispatch(_code_dispatcher)
559def decode(a, encoding=None, errors=None):
560 r"""
561 Calls ``bytes.decode`` element-wise.
562
563 The set of available codecs comes from the Python standard library,
564 and may be extended at runtime. For more information, see the
565 :mod:`codecs` module.
566
567 Parameters
568 ----------
569 a : array_like of str or unicode
570
571 encoding : str, optional
572 The name of an encoding
573
574 errors : str, optional
575 Specifies how to handle encoding errors
576
577 Returns
578 -------
579 out : ndarray
580
581 See Also
582 --------
583 :py:meth:`bytes.decode`
584
585 Notes
586 -----
587 The type of the result will depend on the encoding specified.
588
589 Examples
590 --------
591 >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
592 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
593 >>> c
594 array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
595 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
596 >>> np.char.decode(c, encoding='cp037')
597 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
598
599 """
600 return _to_string_or_unicode_array(
601 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
602
603
604@array_function_dispatch(_code_dispatcher)
605def encode(a, encoding=None, errors=None):
606 """
607 Calls `str.encode` element-wise.
608
609 The set of available codecs comes from the Python standard library,
610 and may be extended at runtime. For more information, see the codecs
611 module.
612
613 Parameters
614 ----------
615 a : array_like of str or unicode
616
617 encoding : str, optional
618 The name of an encoding
619
620 errors : str, optional
621 Specifies how to handle encoding errors
622
623 Returns
624 -------
625 out : ndarray
626
627 See Also
628 --------
629 str.encode
630
631 Notes
632 -----
633 The type of the result will depend on the encoding specified.
634
635 """
636 return _to_string_or_unicode_array(
637 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
638
639
640def _endswith_dispatcher(a, suffix, start=None, end=None):
641 return (a,)
642
643
644@array_function_dispatch(_endswith_dispatcher)
645def endswith(a, suffix, start=0, end=None):
646 """
647 Returns a boolean array which is `True` where the string element
648 in `a` ends with `suffix`, otherwise `False`.
649
650 Calls `str.endswith` element-wise.
651
652 Parameters
653 ----------
654 a : array_like of str or unicode
655
656 suffix : str
657
658 start, end : int, optional
659 With optional `start`, test beginning at that position. With
660 optional `end`, stop comparing at that position.
661
662 Returns
663 -------
664 out : ndarray
665 Outputs an array of bools.
666
667 See Also
668 --------
669 str.endswith
670
671 Examples
672 --------
673 >>> s = np.array(['foo', 'bar'])
674 >>> s[0] = 'foo'
675 >>> s[1] = 'bar'
676 >>> s
677 array(['foo', 'bar'], dtype='<U3')
678 >>> np.char.endswith(s, 'ar')
679 array([False, True])
680 >>> np.char.endswith(s, 'a', start=1, end=2)
681 array([False, True])
682
683 """
684 return _vec_string(
685 a, bool_, 'endswith', [suffix, start] + _clean_args(end))
686
687
688def _expandtabs_dispatcher(a, tabsize=None):
689 return (a,)
690
691
692@array_function_dispatch(_expandtabs_dispatcher)
693def expandtabs(a, tabsize=8):
694 """
695 Return a copy of each string element where all tab characters are
696 replaced by one or more spaces.
697
698 Calls `str.expandtabs` element-wise.
699
700 Return a copy of each string element where all tab characters are
701 replaced by one or more spaces, depending on the current column
702 and the given `tabsize`. The column number is reset to zero after
703 each newline occurring in the string. This doesn't understand other
704 non-printing characters or escape sequences.
705
706 Parameters
707 ----------
708 a : array_like of str or unicode
709 Input array
710 tabsize : int, optional
711 Replace tabs with `tabsize` number of spaces. If not given defaults
712 to 8 spaces.
713
714 Returns
715 -------
716 out : ndarray
717 Output array of str or unicode, depending on input type
718
719 See Also
720 --------
721 str.expandtabs
722
723 """
724 return _to_string_or_unicode_array(
725 _vec_string(a, object_, 'expandtabs', (tabsize,)))
726
727
728@array_function_dispatch(_count_dispatcher)
729def find(a, sub, start=0, end=None):
730 """
731 For each element, return the lowest index in the string where
732 substring `sub` is found.
733
734 Calls `str.find` element-wise.
735
736 For each element, return the lowest index in the string where
737 substring `sub` is found, such that `sub` is contained in the
738 range [`start`, `end`].
739
740 Parameters
741 ----------
742 a : array_like of str or unicode
743
744 sub : str or unicode
745
746 start, end : int, optional
747 Optional arguments `start` and `end` are interpreted as in
748 slice notation.
749
750 Returns
751 -------
752 out : ndarray or int
753 Output array of ints. Returns -1 if `sub` is not found.
754
755 See Also
756 --------
757 str.find
758
759 Examples
760 --------
761 >>> a = np.array(["NumPy is a Python library"])
762 >>> np.char.find(a, "Python", start=0, end=None)
763 array([11])
764
765 """
766 return _vec_string(
767 a, int_, 'find', [sub, start] + _clean_args(end))
768
769
770@array_function_dispatch(_count_dispatcher)
771def index(a, sub, start=0, end=None):
772 """
773 Like `find`, but raises `ValueError` when the substring is not found.
774
775 Calls `str.index` element-wise.
776
777 Parameters
778 ----------
779 a : array_like of str or unicode
780
781 sub : str or unicode
782
783 start, end : int, optional
784
785 Returns
786 -------
787 out : ndarray
788 Output array of ints. Returns -1 if `sub` is not found.
789
790 See Also
791 --------
792 find, str.find
793
794 Examples
795 --------
796 >>> a = np.array(["Computer Science"])
797 >>> np.char.index(a, "Science", start=0, end=None)
798 array([9])
799
800 """
801 return _vec_string(
802 a, int_, 'index', [sub, start] + _clean_args(end))
803
804
805@array_function_dispatch(_unary_op_dispatcher)
806def isalnum(a):
807 """
808 Returns true for each element if all characters in the string are
809 alphanumeric and there is at least one character, false otherwise.
810
811 Calls `str.isalnum` element-wise.
812
813 For 8-bit strings, this method is locale-dependent.
814
815 Parameters
816 ----------
817 a : array_like of str or unicode
818
819 Returns
820 -------
821 out : ndarray
822 Output array of str or unicode, depending on input type
823
824 See Also
825 --------
826 str.isalnum
827 """
828 return _vec_string(a, bool_, 'isalnum')
829
830
831@array_function_dispatch(_unary_op_dispatcher)
832def isalpha(a):
833 """
834 Returns true for each element if all characters in the string are
835 alphabetic and there is at least one character, false otherwise.
836
837 Calls `str.isalpha` element-wise.
838
839 For 8-bit strings, this method is locale-dependent.
840
841 Parameters
842 ----------
843 a : array_like of str or unicode
844
845 Returns
846 -------
847 out : ndarray
848 Output array of bools
849
850 See Also
851 --------
852 str.isalpha
853 """
854 return _vec_string(a, bool_, 'isalpha')
855
856
857@array_function_dispatch(_unary_op_dispatcher)
858def isdigit(a):
859 """
860 Returns true for each element if all characters in the string are
861 digits and there is at least one character, false otherwise.
862
863 Calls `str.isdigit` element-wise.
864
865 For 8-bit strings, this method is locale-dependent.
866
867 Parameters
868 ----------
869 a : array_like of str or unicode
870
871 Returns
872 -------
873 out : ndarray
874 Output array of bools
875
876 See Also
877 --------
878 str.isdigit
879
880 Examples
881 --------
882 >>> a = np.array(['a', 'b', '0'])
883 >>> np.char.isdigit(a)
884 array([False, False, True])
885 >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])
886 >>> np.char.isdigit(a)
887 array([[False, False, True], [False, True, True]])
888 """
889 return _vec_string(a, bool_, 'isdigit')
890
891
892@array_function_dispatch(_unary_op_dispatcher)
893def islower(a):
894 """
895 Returns true for each element if all cased characters in the
896 string are lowercase and there is at least one cased character,
897 false otherwise.
898
899 Calls `str.islower` element-wise.
900
901 For 8-bit strings, this method is locale-dependent.
902
903 Parameters
904 ----------
905 a : array_like of str or unicode
906
907 Returns
908 -------
909 out : ndarray
910 Output array of bools
911
912 See Also
913 --------
914 str.islower
915 """
916 return _vec_string(a, bool_, 'islower')
917
918
919@array_function_dispatch(_unary_op_dispatcher)
920def isspace(a):
921 """
922 Returns true for each element if there are only whitespace
923 characters in the string and there is at least one character,
924 false otherwise.
925
926 Calls `str.isspace` element-wise.
927
928 For 8-bit strings, this method is locale-dependent.
929
930 Parameters
931 ----------
932 a : array_like of str or unicode
933
934 Returns
935 -------
936 out : ndarray
937 Output array of bools
938
939 See Also
940 --------
941 str.isspace
942 """
943 return _vec_string(a, bool_, 'isspace')
944
945
946@array_function_dispatch(_unary_op_dispatcher)
947def istitle(a):
948 """
949 Returns true for each element if the element is a titlecased
950 string and there is at least one character, false otherwise.
951
952 Call `str.istitle` element-wise.
953
954 For 8-bit strings, this method is locale-dependent.
955
956 Parameters
957 ----------
958 a : array_like of str or unicode
959
960 Returns
961 -------
962 out : ndarray
963 Output array of bools
964
965 See Also
966 --------
967 str.istitle
968 """
969 return _vec_string(a, bool_, 'istitle')
970
971
972@array_function_dispatch(_unary_op_dispatcher)
973def isupper(a):
974 """
975 Return true for each element if all cased characters in the
976 string are uppercase and there is at least one character, false
977 otherwise.
978
979 Call `str.isupper` element-wise.
980
981 For 8-bit strings, this method is locale-dependent.
982
983 Parameters
984 ----------
985 a : array_like of str or unicode
986
987 Returns
988 -------
989 out : ndarray
990 Output array of bools
991
992 See Also
993 --------
994 str.isupper
995
996 Examples
997 --------
998 >>> str = "GHC"
999 >>> np.char.isupper(str)
1000 array(True)
1001 >>> a = np.array(["hello", "HELLO", "Hello"])
1002 >>> np.char.isupper(a)
1003 array([False, True, False])
1004
1005 """
1006 return _vec_string(a, bool_, 'isupper')
1007
1008
1009def _join_dispatcher(sep, seq):
1010 return (sep, seq)
1011
1012
1013@array_function_dispatch(_join_dispatcher)
1014def join(sep, seq):
1015 """
1016 Return a string which is the concatenation of the strings in the
1017 sequence `seq`.
1018
1019 Calls `str.join` element-wise.
1020
1021 Parameters
1022 ----------
1023 sep : array_like of str or unicode
1024 seq : array_like of str or unicode
1025
1026 Returns
1027 -------
1028 out : ndarray
1029 Output array of str or unicode, depending on input types
1030
1031 See Also
1032 --------
1033 str.join
1034
1035 Examples
1036 --------
1037 >>> np.char.join('-', 'osd')
1038 array('o-s-d', dtype='<U5')
1039
1040 >>> np.char.join(['-', '.'], ['ghc', 'osd'])
1041 array(['g-h-c', 'o.s.d'], dtype='<U5')
1042
1043 """
1044 return _to_string_or_unicode_array(
1045 _vec_string(sep, object_, 'join', (seq,)))
1046
1047
1048
1049def _just_dispatcher(a, width, fillchar=None):
1050 return (a,)
1051
1052
1053@array_function_dispatch(_just_dispatcher)
1054def ljust(a, width, fillchar=' '):
1055 """
1056 Return an array with the elements of `a` left-justified in a
1057 string of length `width`.
1058
1059 Calls `str.ljust` element-wise.
1060
1061 Parameters
1062 ----------
1063 a : array_like of str or unicode
1064
1065 width : int
1066 The length of the resulting strings
1067 fillchar : str or unicode, optional
1068 The character to use for padding
1069
1070 Returns
1071 -------
1072 out : ndarray
1073 Output array of str or unicode, depending on input type
1074
1075 See Also
1076 --------
1077 str.ljust
1078
1079 """
1080 a_arr = numpy.asarray(a)
1081 width_arr = numpy.asarray(width)
1082 size = int(numpy.max(width_arr.flat))
1083 if numpy.issubdtype(a_arr.dtype, numpy.string_):
1084 fillchar = asbytes(fillchar)
1085 return _vec_string(
1086 a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
1087
1088
1089@array_function_dispatch(_unary_op_dispatcher)
1090def lower(a):
1091 """
1092 Return an array with the elements converted to lowercase.
1093
1094 Call `str.lower` element-wise.
1095
1096 For 8-bit strings, this method is locale-dependent.
1097
1098 Parameters
1099 ----------
1100 a : array_like, {str, unicode}
1101 Input array.
1102
1103 Returns
1104 -------
1105 out : ndarray, {str, unicode}
1106 Output array of str or unicode, depending on input type
1107
1108 See Also
1109 --------
1110 str.lower
1111
1112 Examples
1113 --------
1114 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
1115 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1116 >>> np.char.lower(c)
1117 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1118
1119 """
1120 a_arr = numpy.asarray(a)
1121 return _vec_string(a_arr, a_arr.dtype, 'lower')
1122
1123
1124def _strip_dispatcher(a, chars=None):
1125 return (a,)
1126
1127
1128@array_function_dispatch(_strip_dispatcher)
1129def lstrip(a, chars=None):
1130 """
1131 For each element in `a`, return a copy with the leading characters
1132 removed.
1133
1134 Calls `str.lstrip` element-wise.
1135
1136 Parameters
1137 ----------
1138 a : array-like, {str, unicode}
1139 Input array.
1140
1141 chars : {str, unicode}, optional
1142 The `chars` argument is a string specifying the set of
1143 characters to be removed. If omitted or None, the `chars`
1144 argument defaults to removing whitespace. The `chars` argument
1145 is not a prefix; rather, all combinations of its values are
1146 stripped.
1147
1148 Returns
1149 -------
1150 out : ndarray, {str, unicode}
1151 Output array of str or unicode, depending on input type
1152
1153 See Also
1154 --------
1155 str.lstrip
1156
1157 Examples
1158 --------
1159 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1160 >>> c
1161 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1162
1163 The 'a' variable is unstripped from c[1] because whitespace leading.
1164
1165 >>> np.char.lstrip(c, 'a')
1166 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
1167
1168
1169 >>> np.char.lstrip(c, 'A') # leaves c unchanged
1170 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1171 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
1172 ... # XXX: is this a regression? This used to return True
1173 ... # np.char.lstrip(c,'') does not modify c at all.
1174 False
1175 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
1176 True
1177
1178 """
1179 a_arr = numpy.asarray(a)
1180 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
1181
1182
1183def _partition_dispatcher(a, sep):
1184 return (a,)
1185
1186
1187@array_function_dispatch(_partition_dispatcher)
1188def partition(a, sep):
1189 """
1190 Partition each element in `a` around `sep`.
1191
1192 Calls `str.partition` element-wise.
1193
1194 For each element in `a`, split the element as the first
1195 occurrence of `sep`, and return 3 strings containing the part
1196 before the separator, the separator itself, and the part after
1197 the separator. If the separator is not found, return 3 strings
1198 containing the string itself, followed by two empty strings.
1199
1200 Parameters
1201 ----------
1202 a : array_like, {str, unicode}
1203 Input array
1204 sep : {str, unicode}
1205 Separator to split each string element in `a`.
1206
1207 Returns
1208 -------
1209 out : ndarray, {str, unicode}
1210 Output array of str or unicode, depending on input type.
1211 The output array will have an extra dimension with 3
1212 elements per input element.
1213
1214 See Also
1215 --------
1216 str.partition
1217
1218 """
1219 return _to_string_or_unicode_array(
1220 _vec_string(a, object_, 'partition', (sep,)))
1221
1222
1223def _replace_dispatcher(a, old, new, count=None):
1224 return (a,)
1225
1226
1227@array_function_dispatch(_replace_dispatcher)
1228def replace(a, old, new, count=None):
1229 """
1230 For each element in `a`, return a copy of the string with all
1231 occurrences of substring `old` replaced by `new`.
1232
1233 Calls `str.replace` element-wise.
1234
1235 Parameters
1236 ----------
1237 a : array-like of str or unicode
1238
1239 old, new : str or unicode
1240
1241 count : int, optional
1242 If the optional argument `count` is given, only the first
1243 `count` occurrences are replaced.
1244
1245 Returns
1246 -------
1247 out : ndarray
1248 Output array of str or unicode, depending on input type
1249
1250 See Also
1251 --------
1252 str.replace
1253
1254 Examples
1255 --------
1256 >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
1257 >>> np.char.replace(a, 'mango', 'banana')
1258 array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
1259
1260 >>> a = np.array(["The dish is fresh", "This is it"])
1261 >>> np.char.replace(a, 'is', 'was')
1262 array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
1263 """
1264 return _to_string_or_unicode_array(
1265 _vec_string(
1266 a, object_, 'replace', [old, new] + _clean_args(count)))
1267
1268
1269@array_function_dispatch(_count_dispatcher)
1270def rfind(a, sub, start=0, end=None):
1271 """
1272 For each element in `a`, return the highest index in the string
1273 where substring `sub` is found, such that `sub` is contained
1274 within [`start`, `end`].
1275
1276 Calls `str.rfind` element-wise.
1277
1278 Parameters
1279 ----------
1280 a : array-like of str or unicode
1281
1282 sub : str or unicode
1283
1284 start, end : int, optional
1285 Optional arguments `start` and `end` are interpreted as in
1286 slice notation.
1287
1288 Returns
1289 -------
1290 out : ndarray
1291 Output array of ints. Return -1 on failure.
1292
1293 See Also
1294 --------
1295 str.rfind
1296
1297 """
1298 return _vec_string(
1299 a, int_, 'rfind', [sub, start] + _clean_args(end))
1300
1301
1302@array_function_dispatch(_count_dispatcher)
1303def rindex(a, sub, start=0, end=None):
1304 """
1305 Like `rfind`, but raises `ValueError` when the substring `sub` is
1306 not found.
1307
1308 Calls `str.rindex` element-wise.
1309
1310 Parameters
1311 ----------
1312 a : array-like of str or unicode
1313
1314 sub : str or unicode
1315
1316 start, end : int, optional
1317
1318 Returns
1319 -------
1320 out : ndarray
1321 Output array of ints.
1322
1323 See Also
1324 --------
1325 rfind, str.rindex
1326
1327 """
1328 return _vec_string(
1329 a, int_, 'rindex', [sub, start] + _clean_args(end))
1330
1331
1332@array_function_dispatch(_just_dispatcher)
1333def rjust(a, width, fillchar=' '):
1334 """
1335 Return an array with the elements of `a` right-justified in a
1336 string of length `width`.
1337
1338 Calls `str.rjust` element-wise.
1339
1340 Parameters
1341 ----------
1342 a : array_like of str or unicode
1343
1344 width : int
1345 The length of the resulting strings
1346 fillchar : str or unicode, optional
1347 The character to use for padding
1348
1349 Returns
1350 -------
1351 out : ndarray
1352 Output array of str or unicode, depending on input type
1353
1354 See Also
1355 --------
1356 str.rjust
1357
1358 """
1359 a_arr = numpy.asarray(a)
1360 width_arr = numpy.asarray(width)
1361 size = int(numpy.max(width_arr.flat))
1362 if numpy.issubdtype(a_arr.dtype, numpy.string_):
1363 fillchar = asbytes(fillchar)
1364 return _vec_string(
1365 a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
1366
1367
1368@array_function_dispatch(_partition_dispatcher)
1369def rpartition(a, sep):
1370 """
1371 Partition (split) each element around the right-most separator.
1372
1373 Calls `str.rpartition` element-wise.
1374
1375 For each element in `a`, split the element as the last
1376 occurrence of `sep`, and return 3 strings containing the part
1377 before the separator, the separator itself, and the part after
1378 the separator. If the separator is not found, return 3 strings
1379 containing the string itself, followed by two empty strings.
1380
1381 Parameters
1382 ----------
1383 a : array_like of str or unicode
1384 Input array
1385 sep : str or unicode
1386 Right-most separator to split each element in array.
1387
1388 Returns
1389 -------
1390 out : ndarray
1391 Output array of string or unicode, depending on input
1392 type. The output array will have an extra dimension with
1393 3 elements per input element.
1394
1395 See Also
1396 --------
1397 str.rpartition
1398
1399 """
1400 return _to_string_or_unicode_array(
1401 _vec_string(a, object_, 'rpartition', (sep,)))
1402
1403
1404def _split_dispatcher(a, sep=None, maxsplit=None):
1405 return (a,)
1406
1407
1408@array_function_dispatch(_split_dispatcher)
1409def rsplit(a, sep=None, maxsplit=None):
1410 """
1411 For each element in `a`, return a list of the words in the
1412 string, using `sep` as the delimiter string.
1413
1414 Calls `str.rsplit` element-wise.
1415
1416 Except for splitting from the right, `rsplit`
1417 behaves like `split`.
1418
1419 Parameters
1420 ----------
1421 a : array_like of str or unicode
1422
1423 sep : str or unicode, optional
1424 If `sep` is not specified or None, any whitespace string
1425 is a separator.
1426 maxsplit : int, optional
1427 If `maxsplit` is given, at most `maxsplit` splits are done,
1428 the rightmost ones.
1429
1430 Returns
1431 -------
1432 out : ndarray
1433 Array of list objects
1434
1435 See Also
1436 --------
1437 str.rsplit, split
1438
1439 """
1440 # This will return an array of lists of different sizes, so we
1441 # leave it as an object array
1442 return _vec_string(
1443 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
1444
1445
1446def _strip_dispatcher(a, chars=None):
1447 return (a,)
1448
1449
1450@array_function_dispatch(_strip_dispatcher)
1451def rstrip(a, chars=None):
1452 """
1453 For each element in `a`, return a copy with the trailing
1454 characters removed.
1455
1456 Calls `str.rstrip` element-wise.
1457
1458 Parameters
1459 ----------
1460 a : array-like of str or unicode
1461
1462 chars : str or unicode, optional
1463 The `chars` argument is a string specifying the set of
1464 characters to be removed. If omitted or None, the `chars`
1465 argument defaults to removing whitespace. The `chars` argument
1466 is not a suffix; rather, all combinations of its values are
1467 stripped.
1468
1469 Returns
1470 -------
1471 out : ndarray
1472 Output array of str or unicode, depending on input type
1473
1474 See Also
1475 --------
1476 str.rstrip
1477
1478 Examples
1479 --------
1480 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
1481 array(['aAaAaA', 'abBABba'],
1482 dtype='|S7')
1483 >>> np.char.rstrip(c, b'a')
1484 array(['aAaAaA', 'abBABb'],
1485 dtype='|S7')
1486 >>> np.char.rstrip(c, b'A')
1487 array(['aAaAa', 'abBABba'],
1488 dtype='|S7')
1489
1490 """
1491 a_arr = numpy.asarray(a)
1492 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
1493
1494
1495@array_function_dispatch(_split_dispatcher)
1496def split(a, sep=None, maxsplit=None):
1497 """
1498 For each element in `a`, return a list of the words in the
1499 string, using `sep` as the delimiter string.
1500
1501 Calls `str.split` element-wise.
1502
1503 Parameters
1504 ----------
1505 a : array_like of str or unicode
1506
1507 sep : str or unicode, optional
1508 If `sep` is not specified or None, any whitespace string is a
1509 separator.
1510
1511 maxsplit : int, optional
1512 If `maxsplit` is given, at most `maxsplit` splits are done.
1513
1514 Returns
1515 -------
1516 out : ndarray
1517 Array of list objects
1518
1519 See Also
1520 --------
1521 str.split, rsplit
1522
1523 """
1524 # This will return an array of lists of different sizes, so we
1525 # leave it as an object array
1526 return _vec_string(
1527 a, object_, 'split', [sep] + _clean_args(maxsplit))
1528
1529
1530def _splitlines_dispatcher(a, keepends=None):
1531 return (a,)
1532
1533
1534@array_function_dispatch(_splitlines_dispatcher)
1535def splitlines(a, keepends=None):
1536 """
1537 For each element in `a`, return a list of the lines in the
1538 element, breaking at line boundaries.
1539
1540 Calls `str.splitlines` element-wise.
1541
1542 Parameters
1543 ----------
1544 a : array_like of str or unicode
1545
1546 keepends : bool, optional
1547 Line breaks are not included in the resulting list unless
1548 keepends is given and true.
1549
1550 Returns
1551 -------
1552 out : ndarray
1553 Array of list objects
1554
1555 See Also
1556 --------
1557 str.splitlines
1558
1559 """
1560 return _vec_string(
1561 a, object_, 'splitlines', _clean_args(keepends))
1562
1563
1564def _startswith_dispatcher(a, prefix, start=None, end=None):
1565 return (a,)
1566
1567
1568@array_function_dispatch(_startswith_dispatcher)
1569def startswith(a, prefix, start=0, end=None):
1570 """
1571 Returns a boolean array which is `True` where the string element
1572 in `a` starts with `prefix`, otherwise `False`.
1573
1574 Calls `str.startswith` element-wise.
1575
1576 Parameters
1577 ----------
1578 a : array_like of str or unicode
1579
1580 prefix : str
1581
1582 start, end : int, optional
1583 With optional `start`, test beginning at that position. With
1584 optional `end`, stop comparing at that position.
1585
1586 Returns
1587 -------
1588 out : ndarray
1589 Array of booleans
1590
1591 See Also
1592 --------
1593 str.startswith
1594
1595 """
1596 return _vec_string(
1597 a, bool_, 'startswith', [prefix, start] + _clean_args(end))
1598
1599
1600@array_function_dispatch(_strip_dispatcher)
1601def strip(a, chars=None):
1602 """
1603 For each element in `a`, return a copy with the leading and
1604 trailing characters removed.
1605
1606 Calls `str.strip` element-wise.
1607
1608 Parameters
1609 ----------
1610 a : array-like of str or unicode
1611
1612 chars : str or unicode, optional
1613 The `chars` argument is a string specifying the set of
1614 characters to be removed. If omitted or None, the `chars`
1615 argument defaults to removing whitespace. The `chars` argument
1616 is not a prefix or suffix; rather, all combinations of its
1617 values are stripped.
1618
1619 Returns
1620 -------
1621 out : ndarray
1622 Output array of str or unicode, depending on input type
1623
1624 See Also
1625 --------
1626 str.strip
1627
1628 Examples
1629 --------
1630 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1631 >>> c
1632 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1633 >>> np.char.strip(c)
1634 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
1635 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
1636 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
1637 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
1638 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
1639
1640 """
1641 a_arr = numpy.asarray(a)
1642 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
1643
1644
1645@array_function_dispatch(_unary_op_dispatcher)
1646def swapcase(a):
1647 """
1648 Return element-wise a copy of the string with
1649 uppercase characters converted to lowercase and vice versa.
1650
1651 Calls `str.swapcase` element-wise.
1652
1653 For 8-bit strings, this method is locale-dependent.
1654
1655 Parameters
1656 ----------
1657 a : array_like, {str, unicode}
1658 Input array.
1659
1660 Returns
1661 -------
1662 out : ndarray, {str, unicode}
1663 Output array of str or unicode, depending on input type
1664
1665 See Also
1666 --------
1667 str.swapcase
1668
1669 Examples
1670 --------
1671 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
1672 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
1673 dtype='|S5')
1674 >>> np.char.swapcase(c)
1675 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
1676 dtype='|S5')
1677
1678 """
1679 a_arr = numpy.asarray(a)
1680 return _vec_string(a_arr, a_arr.dtype, 'swapcase')
1681
1682
1683@array_function_dispatch(_unary_op_dispatcher)
1684def title(a):
1685 """
1686 Return element-wise title cased version of string or unicode.
1687
1688 Title case words start with uppercase characters, all remaining cased
1689 characters are lowercase.
1690
1691 Calls `str.title` element-wise.
1692
1693 For 8-bit strings, this method is locale-dependent.
1694
1695 Parameters
1696 ----------
1697 a : array_like, {str, unicode}
1698 Input array.
1699
1700 Returns
1701 -------
1702 out : ndarray
1703 Output array of str or unicode, depending on input type
1704
1705 See Also
1706 --------
1707 str.title
1708
1709 Examples
1710 --------
1711 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
1712 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
1713 dtype='|S5')
1714 >>> np.char.title(c)
1715 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
1716 dtype='|S5')
1717
1718 """
1719 a_arr = numpy.asarray(a)
1720 return _vec_string(a_arr, a_arr.dtype, 'title')
1721
1722
1723def _translate_dispatcher(a, table, deletechars=None):
1724 return (a,)
1725
1726
1727@array_function_dispatch(_translate_dispatcher)
1728def translate(a, table, deletechars=None):
1729 """
1730 For each element in `a`, return a copy of the string where all
1731 characters occurring in the optional argument `deletechars` are
1732 removed, and the remaining characters have been mapped through the
1733 given translation table.
1734
1735 Calls `str.translate` element-wise.
1736
1737 Parameters
1738 ----------
1739 a : array-like of str or unicode
1740
1741 table : str of length 256
1742
1743 deletechars : str
1744
1745 Returns
1746 -------
1747 out : ndarray
1748 Output array of str or unicode, depending on input type
1749
1750 See Also
1751 --------
1752 str.translate
1753
1754 """
1755 a_arr = numpy.asarray(a)
1756 if issubclass(a_arr.dtype.type, unicode_):
1757 return _vec_string(
1758 a_arr, a_arr.dtype, 'translate', (table,))
1759 else:
1760 return _vec_string(
1761 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
1762
1763
1764@array_function_dispatch(_unary_op_dispatcher)
1765def upper(a):
1766 """
1767 Return an array with the elements converted to uppercase.
1768
1769 Calls `str.upper` element-wise.
1770
1771 For 8-bit strings, this method is locale-dependent.
1772
1773 Parameters
1774 ----------
1775 a : array_like, {str, unicode}
1776 Input array.
1777
1778 Returns
1779 -------
1780 out : ndarray, {str, unicode}
1781 Output array of str or unicode, depending on input type
1782
1783 See Also
1784 --------
1785 str.upper
1786
1787 Examples
1788 --------
1789 >>> c = np.array(['a1b c', '1bca', 'bca1']); c
1790 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1791 >>> np.char.upper(c)
1792 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1793
1794 """
1795 a_arr = numpy.asarray(a)
1796 return _vec_string(a_arr, a_arr.dtype, 'upper')
1797
1798
1799def _zfill_dispatcher(a, width):
1800 return (a,)
1801
1802
1803@array_function_dispatch(_zfill_dispatcher)
1804def zfill(a, width):
1805 """
1806 Return the numeric string left-filled with zeros
1807
1808 Calls `str.zfill` element-wise.
1809
1810 Parameters
1811 ----------
1812 a : array_like, {str, unicode}
1813 Input array.
1814 width : int
1815 Width of string to left-fill elements in `a`.
1816
1817 Returns
1818 -------
1819 out : ndarray, {str, unicode}
1820 Output array of str or unicode, depending on input type
1821
1822 See Also
1823 --------
1824 str.zfill
1825
1826 """
1827 a_arr = numpy.asarray(a)
1828 width_arr = numpy.asarray(width)
1829 size = int(numpy.max(width_arr.flat))
1830 return _vec_string(
1831 a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
1832
1833
1834@array_function_dispatch(_unary_op_dispatcher)
1835def isnumeric(a):
1836 """
1837 For each element, return True if there are only numeric
1838 characters in the element.
1839
1840 Calls `unicode.isnumeric` element-wise.
1841
1842 Numeric characters include digit characters, and all characters
1843 that have the Unicode numeric value property, e.g. ``U+2155,
1844 VULGAR FRACTION ONE FIFTH``.
1845
1846 Parameters
1847 ----------
1848 a : array_like, unicode
1849 Input array.
1850
1851 Returns
1852 -------
1853 out : ndarray, bool
1854 Array of booleans of same shape as `a`.
1855
1856 See Also
1857 --------
1858 unicode.isnumeric
1859
1860 Examples
1861 --------
1862 >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])
1863 array([ True, False, False, False, False])
1864
1865 """
1866 if _use_unicode(a) != unicode_:
1867 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1868 return _vec_string(a, bool_, 'isnumeric')
1869
1870
1871@array_function_dispatch(_unary_op_dispatcher)
1872def isdecimal(a):
1873 """
1874 For each element, return True if there are only decimal
1875 characters in the element.
1876
1877 Calls `unicode.isdecimal` element-wise.
1878
1879 Decimal characters include digit characters, and all characters
1880 that can be used to form decimal-radix numbers,
1881 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
1882
1883 Parameters
1884 ----------
1885 a : array_like, unicode
1886 Input array.
1887
1888 Returns
1889 -------
1890 out : ndarray, bool
1891 Array of booleans identical in shape to `a`.
1892
1893 See Also
1894 --------
1895 unicode.isdecimal
1896
1897 Examples
1898 --------
1899 >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])
1900 array([ True, False, False, False])
1901
1902 """
1903 if _use_unicode(a) != unicode_:
1904 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1905 return _vec_string(a, bool_, 'isdecimal')
1906
1907
1908@set_module('numpy')
1909class chararray(ndarray):
1910 """
1911 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
1912 strides=None, order=None)
1913
1914 Provides a convenient view on arrays of string and unicode values.
1915
1916 .. note::
1917 The `chararray` class exists for backwards compatibility with
1918 Numarray, it is not recommended for new development. Starting from numpy
1919 1.4, if one needs arrays of strings, it is recommended to use arrays of
1920 `dtype` `object_`, `string_` or `unicode_`, and use the free functions
1921 in the `numpy.char` module for fast vectorized string operations.
1922
1923 Versus a regular NumPy array of type `str` or `unicode`, this
1924 class adds the following functionality:
1925
1926 1) values automatically have whitespace removed from the end
1927 when indexed
1928
1929 2) comparison operators automatically remove whitespace from the
1930 end when comparing values
1931
1932 3) vectorized string operations are provided as methods
1933 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
1934
1935 chararrays should be created using `numpy.char.array` or
1936 `numpy.char.asarray`, rather than this constructor directly.
1937
1938 This constructor creates the array, using `buffer` (with `offset`
1939 and `strides`) if it is not ``None``. If `buffer` is ``None``, then
1940 constructs a new array with `strides` in "C order", unless both
1941 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
1942 is in "Fortran order".
1943
1944 Methods
1945 -------
1946 astype
1947 argsort
1948 copy
1949 count
1950 decode
1951 dump
1952 dumps
1953 encode
1954 endswith
1955 expandtabs
1956 fill
1957 find
1958 flatten
1959 getfield
1960 index
1961 isalnum
1962 isalpha
1963 isdecimal
1964 isdigit
1965 islower
1966 isnumeric
1967 isspace
1968 istitle
1969 isupper
1970 item
1971 join
1972 ljust
1973 lower
1974 lstrip
1975 nonzero
1976 put
1977 ravel
1978 repeat
1979 replace
1980 reshape
1981 resize
1982 rfind
1983 rindex
1984 rjust
1985 rsplit
1986 rstrip
1987 searchsorted
1988 setfield
1989 setflags
1990 sort
1991 split
1992 splitlines
1993 squeeze
1994 startswith
1995 strip
1996 swapaxes
1997 swapcase
1998 take
1999 title
2000 tofile
2001 tolist
2002 tostring
2003 translate
2004 transpose
2005 upper
2006 view
2007 zfill
2008
2009 Parameters
2010 ----------
2011 shape : tuple
2012 Shape of the array.
2013 itemsize : int, optional
2014 Length of each array element, in number of characters. Default is 1.
2015 unicode : bool, optional
2016 Are the array elements of type unicode (True) or string (False).
2017 Default is False.
2018 buffer : object exposing the buffer interface or str, optional
2019 Memory address of the start of the array data. Default is None,
2020 in which case a new array is created.
2021 offset : int, optional
2022 Fixed stride displacement from the beginning of an axis?
2023 Default is 0. Needs to be >=0.
2024 strides : array_like of ints, optional
2025 Strides for the array (see `ndarray.strides` for full description).
2026 Default is None.
2027 order : {'C', 'F'}, optional
2028 The order in which the array data is stored in memory: 'C' ->
2029 "row major" order (the default), 'F' -> "column major"
2030 (Fortran) order.
2031
2032 Examples
2033 --------
2034 >>> charar = np.chararray((3, 3))
2035 >>> charar[:] = 'a'
2036 >>> charar
2037 chararray([[b'a', b'a', b'a'],
2038 [b'a', b'a', b'a'],
2039 [b'a', b'a', b'a']], dtype='|S1')
2040
2041 >>> charar = np.chararray(charar.shape, itemsize=5)
2042 >>> charar[:] = 'abc'
2043 >>> charar
2044 chararray([[b'abc', b'abc', b'abc'],
2045 [b'abc', b'abc', b'abc'],
2046 [b'abc', b'abc', b'abc']], dtype='|S5')
2047
2048 """
2049 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
2050 offset=0, strides=None, order='C'):
2051 global _globalvar
2052
2053 if unicode:
2054 dtype = unicode_
2055 else:
2056 dtype = string_
2057
2058 # force itemsize to be a Python int, since using NumPy integer
2059 # types results in itemsize.itemsize being used as the size of
2060 # strings in the new array.
2061 itemsize = int(itemsize)
2062
2063 if isinstance(buffer, str):
2064 # unicode objects do not have the buffer interface
2065 filler = buffer
2066 buffer = None
2067 else:
2068 filler = None
2069
2070 _globalvar = 1
2071 if buffer is None:
2072 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
2073 order=order)
2074 else:
2075 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
2076 buffer=buffer,
2077 offset=offset, strides=strides,
2078 order=order)
2079 if filler is not None:
2080 self[...] = filler
2081 _globalvar = 0
2082 return self
2083
2084 def __array_finalize__(self, obj):
2085 # The b is a special case because it is used for reconstructing.
2086 if not _globalvar and self.dtype.char not in 'SUbc':
2087 raise ValueError("Can only create a chararray from string data.")
2088
2089 def __getitem__(self, obj):
2090 val = ndarray.__getitem__(self, obj)
2091
2092 if isinstance(val, character):
2093 temp = val.rstrip()
2094 if len(temp) == 0:
2095 val = ''
2096 else:
2097 val = temp
2098
2099 return val
2100
2101 # IMPLEMENTATION NOTE: Most of the methods of this class are
2102 # direct delegations to the free functions in this module.
2103 # However, those that return an array of strings should instead
2104 # return a chararray, so some extra wrapping is required.
2105
2106 def __eq__(self, other):
2107 """
2108 Return (self == other) element-wise.
2109
2110 See Also
2111 --------
2112 equal
2113 """
2114 return equal(self, other)
2115
2116 def __ne__(self, other):
2117 """
2118 Return (self != other) element-wise.
2119
2120 See Also
2121 --------
2122 not_equal
2123 """
2124 return not_equal(self, other)
2125
2126 def __ge__(self, other):
2127 """
2128 Return (self >= other) element-wise.
2129
2130 See Also
2131 --------
2132 greater_equal
2133 """
2134 return greater_equal(self, other)
2135
2136 def __le__(self, other):
2137 """
2138 Return (self <= other) element-wise.
2139
2140 See Also
2141 --------
2142 less_equal
2143 """
2144 return less_equal(self, other)
2145
2146 def __gt__(self, other):
2147 """
2148 Return (self > other) element-wise.
2149
2150 See Also
2151 --------
2152 greater
2153 """
2154 return greater(self, other)
2155
2156 def __lt__(self, other):
2157 """
2158 Return (self < other) element-wise.
2159
2160 See Also
2161 --------
2162 less
2163 """
2164 return less(self, other)
2165
2166 def __add__(self, other):
2167 """
2168 Return (self + other), that is string concatenation,
2169 element-wise for a pair of array_likes of str or unicode.
2170
2171 See Also
2172 --------
2173 add
2174 """
2175 return asarray(add(self, other))
2176
2177 def __radd__(self, other):
2178 """
2179 Return (other + self), that is string concatenation,
2180 element-wise for a pair of array_likes of `string_` or `unicode_`.
2181
2182 See Also
2183 --------
2184 add
2185 """
2186 return asarray(add(numpy.asarray(other), self))
2187
2188 def __mul__(self, i):
2189 """
2190 Return (self * i), that is string multiple concatenation,
2191 element-wise.
2192
2193 See Also
2194 --------
2195 multiply
2196 """
2197 return asarray(multiply(self, i))
2198
2199 def __rmul__(self, i):
2200 """
2201 Return (self * i), that is string multiple concatenation,
2202 element-wise.
2203
2204 See Also
2205 --------
2206 multiply
2207 """
2208 return asarray(multiply(self, i))
2209
2210 def __mod__(self, i):
2211 """
2212 Return (self % i), that is pre-Python 2.6 string formatting
2213 (interpolation), element-wise for a pair of array_likes of `string_`
2214 or `unicode_`.
2215
2216 See Also
2217 --------
2218 mod
2219 """
2220 return asarray(mod(self, i))
2221
2222 def __rmod__(self, other):
2223 return NotImplemented
2224
2225 def argsort(self, axis=-1, kind=None, order=None):
2226 """
2227 Return the indices that sort the array lexicographically.
2228
2229 For full documentation see `numpy.argsort`, for which this method is
2230 in fact merely a "thin wrapper."
2231
2232 Examples
2233 --------
2234 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
2235 >>> c = c.view(np.chararray); c
2236 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
2237 dtype='|S5')
2238 >>> c[c.argsort()]
2239 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
2240 dtype='|S5')
2241
2242 """
2243 return self.__array__().argsort(axis, kind, order)
2244 argsort.__doc__ = ndarray.argsort.__doc__
2245
2246 def capitalize(self):
2247 """
2248 Return a copy of `self` with only the first character of each element
2249 capitalized.
2250
2251 See Also
2252 --------
2253 char.capitalize
2254
2255 """
2256 return asarray(capitalize(self))
2257
2258 def center(self, width, fillchar=' '):
2259 """
2260 Return a copy of `self` with its elements centered in a
2261 string of length `width`.
2262
2263 See Also
2264 --------
2265 center
2266 """
2267 return asarray(center(self, width, fillchar))
2268
2269 def count(self, sub, start=0, end=None):
2270 """
2271 Returns an array with the number of non-overlapping occurrences of
2272 substring `sub` in the range [`start`, `end`].
2273
2274 See Also
2275 --------
2276 char.count
2277
2278 """
2279 return count(self, sub, start, end)
2280
2281 def decode(self, encoding=None, errors=None):
2282 """
2283 Calls ``bytes.decode`` element-wise.
2284
2285 See Also
2286 --------
2287 char.decode
2288
2289 """
2290 return decode(self, encoding, errors)
2291
2292 def encode(self, encoding=None, errors=None):
2293 """
2294 Calls `str.encode` element-wise.
2295
2296 See Also
2297 --------
2298 char.encode
2299
2300 """
2301 return encode(self, encoding, errors)
2302
2303 def endswith(self, suffix, start=0, end=None):
2304 """
2305 Returns a boolean array which is `True` where the string element
2306 in `self` ends with `suffix`, otherwise `False`.
2307
2308 See Also
2309 --------
2310 char.endswith
2311
2312 """
2313 return endswith(self, suffix, start, end)
2314
2315 def expandtabs(self, tabsize=8):
2316 """
2317 Return a copy of each string element where all tab characters are
2318 replaced by one or more spaces.
2319
2320 See Also
2321 --------
2322 char.expandtabs
2323
2324 """
2325 return asarray(expandtabs(self, tabsize))
2326
2327 def find(self, sub, start=0, end=None):
2328 """
2329 For each element, return the lowest index in the string where
2330 substring `sub` is found.
2331
2332 See Also
2333 --------
2334 char.find
2335
2336 """
2337 return find(self, sub, start, end)
2338
2339 def index(self, sub, start=0, end=None):
2340 """
2341 Like `find`, but raises `ValueError` when the substring is not found.
2342
2343 See Also
2344 --------
2345 char.index
2346
2347 """
2348 return index(self, sub, start, end)
2349
2350 def isalnum(self):
2351 """
2352 Returns true for each element if all characters in the string
2353 are alphanumeric and there is at least one character, false
2354 otherwise.
2355
2356 See Also
2357 --------
2358 char.isalnum
2359
2360 """
2361 return isalnum(self)
2362
2363 def isalpha(self):
2364 """
2365 Returns true for each element if all characters in the string
2366 are alphabetic and there is at least one character, false
2367 otherwise.
2368
2369 See Also
2370 --------
2371 char.isalpha
2372
2373 """
2374 return isalpha(self)
2375
2376 def isdigit(self):
2377 """
2378 Returns true for each element if all characters in the string are
2379 digits and there is at least one character, false otherwise.
2380
2381 See Also
2382 --------
2383 char.isdigit
2384
2385 """
2386 return isdigit(self)
2387
2388 def islower(self):
2389 """
2390 Returns true for each element if all cased characters in the
2391 string are lowercase and there is at least one cased character,
2392 false otherwise.
2393
2394 See Also
2395 --------
2396 char.islower
2397
2398 """
2399 return islower(self)
2400
2401 def isspace(self):
2402 """
2403 Returns true for each element if there are only whitespace
2404 characters in the string and there is at least one character,
2405 false otherwise.
2406
2407 See Also
2408 --------
2409 char.isspace
2410
2411 """
2412 return isspace(self)
2413
2414 def istitle(self):
2415 """
2416 Returns true for each element if the element is a titlecased
2417 string and there is at least one character, false otherwise.
2418
2419 See Also
2420 --------
2421 char.istitle
2422
2423 """
2424 return istitle(self)
2425
2426 def isupper(self):
2427 """
2428 Returns true for each element if all cased characters in the
2429 string are uppercase and there is at least one character, false
2430 otherwise.
2431
2432 See Also
2433 --------
2434 char.isupper
2435
2436 """
2437 return isupper(self)
2438
2439 def join(self, seq):
2440 """
2441 Return a string which is the concatenation of the strings in the
2442 sequence `seq`.
2443
2444 See Also
2445 --------
2446 char.join
2447
2448 """
2449 return join(self, seq)
2450
2451 def ljust(self, width, fillchar=' '):
2452 """
2453 Return an array with the elements of `self` left-justified in a
2454 string of length `width`.
2455
2456 See Also
2457 --------
2458 char.ljust
2459
2460 """
2461 return asarray(ljust(self, width, fillchar))
2462
2463 def lower(self):
2464 """
2465 Return an array with the elements of `self` converted to
2466 lowercase.
2467
2468 See Also
2469 --------
2470 char.lower
2471
2472 """
2473 return asarray(lower(self))
2474
2475 def lstrip(self, chars=None):
2476 """
2477 For each element in `self`, return a copy with the leading characters
2478 removed.
2479
2480 See Also
2481 --------
2482 char.lstrip
2483
2484 """
2485 return asarray(lstrip(self, chars))
2486
2487 def partition(self, sep):
2488 """
2489 Partition each element in `self` around `sep`.
2490
2491 See Also
2492 --------
2493 partition
2494 """
2495 return asarray(partition(self, sep))
2496
2497 def replace(self, old, new, count=None):
2498 """
2499 For each element in `self`, return a copy of the string with all
2500 occurrences of substring `old` replaced by `new`.
2501
2502 See Also
2503 --------
2504 char.replace
2505
2506 """
2507 return asarray(replace(self, old, new, count))
2508
2509 def rfind(self, sub, start=0, end=None):
2510 """
2511 For each element in `self`, return the highest index in the string
2512 where substring `sub` is found, such that `sub` is contained
2513 within [`start`, `end`].
2514
2515 See Also
2516 --------
2517 char.rfind
2518
2519 """
2520 return rfind(self, sub, start, end)
2521
2522 def rindex(self, sub, start=0, end=None):
2523 """
2524 Like `rfind`, but raises `ValueError` when the substring `sub` is
2525 not found.
2526
2527 See Also
2528 --------
2529 char.rindex
2530
2531 """
2532 return rindex(self, sub, start, end)
2533
2534 def rjust(self, width, fillchar=' '):
2535 """
2536 Return an array with the elements of `self`
2537 right-justified in a string of length `width`.
2538
2539 See Also
2540 --------
2541 char.rjust
2542
2543 """
2544 return asarray(rjust(self, width, fillchar))
2545
2546 def rpartition(self, sep):
2547 """
2548 Partition each element in `self` around `sep`.
2549
2550 See Also
2551 --------
2552 rpartition
2553 """
2554 return asarray(rpartition(self, sep))
2555
2556 def rsplit(self, sep=None, maxsplit=None):
2557 """
2558 For each element in `self`, return a list of the words in
2559 the string, using `sep` as the delimiter string.
2560
2561 See Also
2562 --------
2563 char.rsplit
2564
2565 """
2566 return rsplit(self, sep, maxsplit)
2567
2568 def rstrip(self, chars=None):
2569 """
2570 For each element in `self`, return a copy with the trailing
2571 characters removed.
2572
2573 See Also
2574 --------
2575 char.rstrip
2576
2577 """
2578 return asarray(rstrip(self, chars))
2579
2580 def split(self, sep=None, maxsplit=None):
2581 """
2582 For each element in `self`, return a list of the words in the
2583 string, using `sep` as the delimiter string.
2584
2585 See Also
2586 --------
2587 char.split
2588
2589 """
2590 return split(self, sep, maxsplit)
2591
2592 def splitlines(self, keepends=None):
2593 """
2594 For each element in `self`, return a list of the lines in the
2595 element, breaking at line boundaries.
2596
2597 See Also
2598 --------
2599 char.splitlines
2600
2601 """
2602 return splitlines(self, keepends)
2603
2604 def startswith(self, prefix, start=0, end=None):
2605 """
2606 Returns a boolean array which is `True` where the string element
2607 in `self` starts with `prefix`, otherwise `False`.
2608
2609 See Also
2610 --------
2611 char.startswith
2612
2613 """
2614 return startswith(self, prefix, start, end)
2615
2616 def strip(self, chars=None):
2617 """
2618 For each element in `self`, return a copy with the leading and
2619 trailing characters removed.
2620
2621 See Also
2622 --------
2623 char.strip
2624
2625 """
2626 return asarray(strip(self, chars))
2627
2628 def swapcase(self):
2629 """
2630 For each element in `self`, return a copy of the string with
2631 uppercase characters converted to lowercase and vice versa.
2632
2633 See Also
2634 --------
2635 char.swapcase
2636
2637 """
2638 return asarray(swapcase(self))
2639
2640 def title(self):
2641 """
2642 For each element in `self`, return a titlecased version of the
2643 string: words start with uppercase characters, all remaining cased
2644 characters are lowercase.
2645
2646 See Also
2647 --------
2648 char.title
2649
2650 """
2651 return asarray(title(self))
2652
2653 def translate(self, table, deletechars=None):
2654 """
2655 For each element in `self`, return a copy of the string where
2656 all characters occurring in the optional argument
2657 `deletechars` are removed, and the remaining characters have
2658 been mapped through the given translation table.
2659
2660 See Also
2661 --------
2662 char.translate
2663
2664 """
2665 return asarray(translate(self, table, deletechars))
2666
2667 def upper(self):
2668 """
2669 Return an array with the elements of `self` converted to
2670 uppercase.
2671
2672 See Also
2673 --------
2674 char.upper
2675
2676 """
2677 return asarray(upper(self))
2678
2679 def zfill(self, width):
2680 """
2681 Return the numeric string left-filled with zeros in a string of
2682 length `width`.
2683
2684 See Also
2685 --------
2686 char.zfill
2687
2688 """
2689 return asarray(zfill(self, width))
2690
2691 def isnumeric(self):
2692 """
2693 For each element in `self`, return True if there are only
2694 numeric characters in the element.
2695
2696 See Also
2697 --------
2698 char.isnumeric
2699
2700 """
2701 return isnumeric(self)
2702
2703 def isdecimal(self):
2704 """
2705 For each element in `self`, return True if there are only
2706 decimal characters in the element.
2707
2708 See Also
2709 --------
2710 char.isdecimal
2711
2712 """
2713 return isdecimal(self)
2714
2715
2716@set_module("numpy.char")
2717def array(obj, itemsize=None, copy=True, unicode=None, order=None):
2718 """
2719 Create a `chararray`.
2720
2721 .. note::
2722 This class is provided for numarray backward-compatibility.
2723 New code (not concerned with numarray compatibility) should use
2724 arrays of type `string_` or `unicode_` and use the free functions
2725 in :mod:`numpy.char <numpy.core.defchararray>` for fast
2726 vectorized string operations instead.
2727
2728 Versus a regular NumPy array of type `str` or `unicode`, this
2729 class adds the following functionality:
2730
2731 1) values automatically have whitespace removed from the end
2732 when indexed
2733
2734 2) comparison operators automatically remove whitespace from the
2735 end when comparing values
2736
2737 3) vectorized string operations are provided as methods
2738 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
2739
2740 Parameters
2741 ----------
2742 obj : array of str or unicode-like
2743
2744 itemsize : int, optional
2745 `itemsize` is the number of characters per scalar in the
2746 resulting array. If `itemsize` is None, and `obj` is an
2747 object array or a Python list, the `itemsize` will be
2748 automatically determined. If `itemsize` is provided and `obj`
2749 is of type str or unicode, then the `obj` string will be
2750 chunked into `itemsize` pieces.
2751
2752 copy : bool, optional
2753 If true (default), then the object is copied. Otherwise, a copy
2754 will only be made if __array__ returns a copy, if obj is a
2755 nested sequence, or if a copy is needed to satisfy any of the other
2756 requirements (`itemsize`, unicode, `order`, etc.).
2757
2758 unicode : bool, optional
2759 When true, the resulting `chararray` can contain Unicode
2760 characters, when false only 8-bit characters. If unicode is
2761 None and `obj` is one of the following:
2762
2763 - a `chararray`,
2764 - an ndarray of type `str` or `unicode`
2765 - a Python str or unicode object,
2766
2767 then the unicode setting of the output array will be
2768 automatically determined.
2769
2770 order : {'C', 'F', 'A'}, optional
2771 Specify the order of the array. If order is 'C' (default), then the
2772 array will be in C-contiguous order (last-index varies the
2773 fastest). If order is 'F', then the returned array
2774 will be in Fortran-contiguous order (first-index varies the
2775 fastest). If order is 'A', then the returned array may
2776 be in any order (either C-, Fortran-contiguous, or even
2777 discontiguous).
2778 """
2779 if isinstance(obj, (bytes, str)):
2780 if unicode is None:
2781 if isinstance(obj, str):
2782 unicode = True
2783 else:
2784 unicode = False
2785
2786 if itemsize is None:
2787 itemsize = len(obj)
2788 shape = len(obj) // itemsize
2789
2790 return chararray(shape, itemsize=itemsize, unicode=unicode,
2791 buffer=obj, order=order)
2792
2793 if isinstance(obj, (list, tuple)):
2794 obj = numpy.asarray(obj)
2795
2796 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
2797 # If we just have a vanilla chararray, create a chararray
2798 # view around it.
2799 if not isinstance(obj, chararray):
2800 obj = obj.view(chararray)
2801
2802 if itemsize is None:
2803 itemsize = obj.itemsize
2804 # itemsize is in 8-bit chars, so for Unicode, we need
2805 # to divide by the size of a single Unicode character,
2806 # which for NumPy is always 4
2807 if issubclass(obj.dtype.type, unicode_):
2808 itemsize //= 4
2809
2810 if unicode is None:
2811 if issubclass(obj.dtype.type, unicode_):
2812 unicode = True
2813 else:
2814 unicode = False
2815
2816 if unicode:
2817 dtype = unicode_
2818 else:
2819 dtype = string_
2820
2821 if order is not None:
2822 obj = numpy.asarray(obj, order=order)
2823 if (copy or
2824 (itemsize != obj.itemsize) or
2825 (not unicode and isinstance(obj, unicode_)) or
2826 (unicode and isinstance(obj, string_))):
2827 obj = obj.astype((dtype, int(itemsize)))
2828 return obj
2829
2830 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
2831 if itemsize is None:
2832 # Since no itemsize was specified, convert the input array to
2833 # a list so the ndarray constructor will automatically
2834 # determine the itemsize for us.
2835 obj = obj.tolist()
2836 # Fall through to the default case
2837
2838 if unicode:
2839 dtype = unicode_
2840 else:
2841 dtype = string_
2842
2843 if itemsize is None:
2844 val = narray(obj, dtype=dtype, order=order, subok=True)
2845 else:
2846 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
2847 return val.view(chararray)
2848
2849
2850@set_module("numpy.char")
2851def asarray(obj, itemsize=None, unicode=None, order=None):
2852 """
2853 Convert the input to a `chararray`, copying the data only if
2854 necessary.
2855
2856 Versus a regular NumPy array of type `str` or `unicode`, this
2857 class adds the following functionality:
2858
2859 1) values automatically have whitespace removed from the end
2860 when indexed
2861
2862 2) comparison operators automatically remove whitespace from the
2863 end when comparing values
2864
2865 3) vectorized string operations are provided as methods
2866 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
2867
2868 Parameters
2869 ----------
2870 obj : array of str or unicode-like
2871
2872 itemsize : int, optional
2873 `itemsize` is the number of characters per scalar in the
2874 resulting array. If `itemsize` is None, and `obj` is an
2875 object array or a Python list, the `itemsize` will be
2876 automatically determined. If `itemsize` is provided and `obj`
2877 is of type str or unicode, then the `obj` string will be
2878 chunked into `itemsize` pieces.
2879
2880 unicode : bool, optional
2881 When true, the resulting `chararray` can contain Unicode
2882 characters, when false only 8-bit characters. If unicode is
2883 None and `obj` is one of the following:
2884
2885 - a `chararray`,
2886 - an ndarray of type `str` or 'unicode`
2887 - a Python str or unicode object,
2888
2889 then the unicode setting of the output array will be
2890 automatically determined.
2891
2892 order : {'C', 'F'}, optional
2893 Specify the order of the array. If order is 'C' (default), then the
2894 array will be in C-contiguous order (last-index varies the
2895 fastest). If order is 'F', then the returned array
2896 will be in Fortran-contiguous order (first-index varies the
2897 fastest).
2898 """
2899 return array(obj, itemsize, copy=False,
2900 unicode=unicode, order=order)