Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/core/defchararray.py: 44%
443 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
1"""
2This module contains a set of functions for vectorized string
3operations and methods.
5.. note::
6 The `chararray` class exists for backwards compatibility with
7 Numarray, it is not recommended for new development. Starting from numpy
8 1.4, if one needs arrays of strings, it is recommended to use arrays of
9 `dtype` `object_`, `bytes_` or `str_`, and use the free functions
10 in the `numpy.char` module for fast vectorized string operations.
12Some methods will only be available if the corresponding string method is
13available in your version of Python.
15The preferred alias for `defchararray` is `numpy.char`.
17"""
18import functools
20from .._utils import set_module
21from .numerictypes import (
22 bytes_, str_, integer, int_, object_, bool_, character)
23from .numeric import ndarray, compare_chararrays
24from .numeric import array as narray
25from numpy.core.multiarray import _vec_string
26from numpy.core import overrides
27from numpy.compat import asbytes
28import numpy
30__all__ = [
31 'equal', 'not_equal', 'greater_equal', 'less_equal',
32 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
33 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
34 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
35 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
36 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
37 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
38 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
39 'array', 'asarray'
40 ]
43_globalvar = 0
45array_function_dispatch = functools.partial(
46 overrides.array_function_dispatch, module='numpy.char')
49def _is_unicode(arr):
50 """Returns True if arr is a string or a string array with a dtype that
51 represents a unicode string, otherwise returns False.
53 """
54 if (isinstance(arr, str) or
55 issubclass(numpy.asarray(arr).dtype.type, str)):
56 return True
57 return False
60def _to_bytes_or_str_array(result, output_dtype_like=None):
61 """
62 Helper function to cast a result back into an array
63 with the appropriate dtype if an object array must be used
64 as an intermediary.
65 """
66 ret = numpy.asarray(result.tolist())
67 dtype = getattr(output_dtype_like, 'dtype', None)
68 if dtype is not None:
69 return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False)
70 return ret
73def _clean_args(*args):
74 """
75 Helper function for delegating arguments to Python string
76 functions.
78 Many of the Python string operations that have optional arguments
79 do not use 'None' to indicate a default value. In these cases,
80 we need to remove all None arguments, and those following them.
81 """
82 newargs = []
83 for chk in args:
84 if chk is None:
85 break
86 newargs.append(chk)
87 return newargs
89def _get_num_chars(a):
90 """
91 Helper function that returns the number of characters per field in
92 a string or unicode array. This is to abstract out the fact that
93 for a unicode array this is itemsize / 4.
94 """
95 if issubclass(a.dtype.type, str_):
96 return a.itemsize // 4
97 return a.itemsize
100def _binary_op_dispatcher(x1, x2):
101 return (x1, x2)
104@array_function_dispatch(_binary_op_dispatcher)
105def equal(x1, x2):
106 """
107 Return (x1 == x2) element-wise.
109 Unlike `numpy.equal`, this comparison is performed by first
110 stripping whitespace characters from the end of the string. This
111 behavior is provided for backward-compatibility with numarray.
113 Parameters
114 ----------
115 x1, x2 : array_like of str or unicode
116 Input arrays of the same shape.
118 Returns
119 -------
120 out : ndarray
121 Output array of bools.
123 See Also
124 --------
125 not_equal, greater_equal, less_equal, greater, less
126 """
127 return compare_chararrays(x1, x2, '==', True)
130@array_function_dispatch(_binary_op_dispatcher)
131def not_equal(x1, x2):
132 """
133 Return (x1 != x2) element-wise.
135 Unlike `numpy.not_equal`, this comparison is performed by first
136 stripping whitespace characters from the end of the string. This
137 behavior is provided for backward-compatibility with numarray.
139 Parameters
140 ----------
141 x1, x2 : array_like of str or unicode
142 Input arrays of the same shape.
144 Returns
145 -------
146 out : ndarray
147 Output array of bools.
149 See Also
150 --------
151 equal, greater_equal, less_equal, greater, less
152 """
153 return compare_chararrays(x1, x2, '!=', True)
156@array_function_dispatch(_binary_op_dispatcher)
157def greater_equal(x1, x2):
158 """
159 Return (x1 >= x2) element-wise.
161 Unlike `numpy.greater_equal`, this comparison is performed by
162 first stripping whitespace characters from the end of the string.
163 This behavior is provided for backward-compatibility with
164 numarray.
166 Parameters
167 ----------
168 x1, x2 : array_like of str or unicode
169 Input arrays of the same shape.
171 Returns
172 -------
173 out : ndarray
174 Output array of bools.
176 See Also
177 --------
178 equal, not_equal, less_equal, greater, less
179 """
180 return compare_chararrays(x1, x2, '>=', True)
183@array_function_dispatch(_binary_op_dispatcher)
184def less_equal(x1, x2):
185 """
186 Return (x1 <= x2) element-wise.
188 Unlike `numpy.less_equal`, this comparison is performed by first
189 stripping whitespace characters from the end of the string. This
190 behavior is provided for backward-compatibility with numarray.
192 Parameters
193 ----------
194 x1, x2 : array_like of str or unicode
195 Input arrays of the same shape.
197 Returns
198 -------
199 out : ndarray
200 Output array of bools.
202 See Also
203 --------
204 equal, not_equal, greater_equal, greater, less
205 """
206 return compare_chararrays(x1, x2, '<=', True)
209@array_function_dispatch(_binary_op_dispatcher)
210def greater(x1, x2):
211 """
212 Return (x1 > x2) element-wise.
214 Unlike `numpy.greater`, this comparison is performed by first
215 stripping whitespace characters from the end of the string. This
216 behavior is provided for backward-compatibility with numarray.
218 Parameters
219 ----------
220 x1, x2 : array_like of str or unicode
221 Input arrays of the same shape.
223 Returns
224 -------
225 out : ndarray
226 Output array of bools.
228 See Also
229 --------
230 equal, not_equal, greater_equal, less_equal, less
231 """
232 return compare_chararrays(x1, x2, '>', True)
235@array_function_dispatch(_binary_op_dispatcher)
236def less(x1, x2):
237 """
238 Return (x1 < x2) element-wise.
240 Unlike `numpy.greater`, this comparison is performed by first
241 stripping whitespace characters from the end of the string. This
242 behavior is provided for backward-compatibility with numarray.
244 Parameters
245 ----------
246 x1, x2 : array_like of str or unicode
247 Input arrays of the same shape.
249 Returns
250 -------
251 out : ndarray
252 Output array of bools.
254 See Also
255 --------
256 equal, not_equal, greater_equal, less_equal, greater
257 """
258 return compare_chararrays(x1, x2, '<', True)
261def _unary_op_dispatcher(a):
262 return (a,)
265@array_function_dispatch(_unary_op_dispatcher)
266def str_len(a):
267 """
268 Return len(a) element-wise.
270 Parameters
271 ----------
272 a : array_like of str or unicode
274 Returns
275 -------
276 out : ndarray
277 Output array of integers
279 See Also
280 --------
281 len
283 Examples
284 --------
285 >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
286 >>> np.char.str_len(a)
287 array([23, 15])
288 >>> a = np.array([u'\u0420', u'\u043e'])
289 >>> np.char.str_len(a)
290 array([1, 1])
291 >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
292 >>> np.char.str_len(a)
293 array([[5, 5], [1, 1]])
294 """
295 # Note: __len__, etc. currently return ints, which are not C-integers.
296 # Generally intp would be expected for lengths, although int is sufficient
297 # due to the dtype itemsize limitation.
298 return _vec_string(a, int_, '__len__')
301@array_function_dispatch(_binary_op_dispatcher)
302def add(x1, x2):
303 """
304 Return element-wise string concatenation for two arrays of str or unicode.
306 Arrays `x1` and `x2` must have the same shape.
308 Parameters
309 ----------
310 x1 : array_like of str or unicode
311 Input array.
312 x2 : array_like of str or unicode
313 Input array.
315 Returns
316 -------
317 add : ndarray
318 Output array of `bytes_` or `str_`, depending on input types
319 of the same shape as `x1` and `x2`.
321 """
322 arr1 = numpy.asarray(x1)
323 arr2 = numpy.asarray(x2)
324 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
326 if type(arr1.dtype) != type(arr2.dtype):
327 # Enforce this for now. The solution to it will be implement add
328 # as a ufunc. It never worked right on Python 3: bytes + unicode gave
329 # nonsense unicode + bytes errored, and unicode + object used the
330 # object dtype itemsize as num chars (worked on short strings).
331 # bytes + void worked but promoting void->bytes is dubious also.
332 raise TypeError(
333 "np.char.add() requires both arrays of the same dtype kind, but "
334 f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases "
335 "where this used to work often lead to incorrect results).")
337 return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,))
339def _multiply_dispatcher(a, i):
340 return (a,)
343@array_function_dispatch(_multiply_dispatcher)
344def multiply(a, i):
345 """
346 Return (a * i), that is string multiple concatenation,
347 element-wise.
349 Values in `i` of less than 0 are treated as 0 (which yields an
350 empty string).
352 Parameters
353 ----------
354 a : array_like of str or unicode
356 i : array_like of ints
358 Returns
359 -------
360 out : ndarray
361 Output array of str or unicode, depending on input types
363 Examples
364 --------
365 >>> a = np.array(["a", "b", "c"])
366 >>> np.char.multiply(x, 3)
367 array(['aaa', 'bbb', 'ccc'], dtype='<U3')
368 >>> i = np.array([1, 2, 3])
369 >>> np.char.multiply(a, i)
370 array(['a', 'bb', 'ccc'], dtype='<U3')
371 >>> np.char.multiply(np.array(['a']), i)
372 array(['a', 'aa', 'aaa'], dtype='<U3')
373 >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
374 >>> np.char.multiply(a, 3)
375 array([['aaa', 'bbb', 'ccc'],
376 ['ddd', 'eee', 'fff']], dtype='<U3')
377 >>> np.char.multiply(a, i)
378 array([['a', 'bb', 'ccc'],
379 ['d', 'ee', 'fff']], dtype='<U3')
380 """
381 a_arr = numpy.asarray(a)
382 i_arr = numpy.asarray(i)
383 if not issubclass(i_arr.dtype.type, integer):
384 raise ValueError("Can only multiply by integers")
385 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
386 return _vec_string(
387 a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,))
390def _mod_dispatcher(a, values):
391 return (a, values)
394@array_function_dispatch(_mod_dispatcher)
395def mod(a, values):
396 """
397 Return (a % i), that is pre-Python 2.6 string formatting
398 (interpolation), element-wise for a pair of array_likes of str
399 or unicode.
401 Parameters
402 ----------
403 a : array_like of str or unicode
405 values : array_like of values
406 These values will be element-wise interpolated into the string.
408 Returns
409 -------
410 out : ndarray
411 Output array of str or unicode, depending on input types
413 See Also
414 --------
415 str.__mod__
417 """
418 return _to_bytes_or_str_array(
419 _vec_string(a, object_, '__mod__', (values,)), a)
422@array_function_dispatch(_unary_op_dispatcher)
423def capitalize(a):
424 """
425 Return a copy of `a` with only the first character of each element
426 capitalized.
428 Calls `str.capitalize` element-wise.
430 For 8-bit strings, this method is locale-dependent.
432 Parameters
433 ----------
434 a : array_like of str or unicode
435 Input array of strings to capitalize.
437 Returns
438 -------
439 out : ndarray
440 Output array of str or unicode, depending on input
441 types
443 See Also
444 --------
445 str.capitalize
447 Examples
448 --------
449 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
450 array(['a1b2', '1b2a', 'b2a1', '2a1b'],
451 dtype='|S4')
452 >>> np.char.capitalize(c)
453 array(['A1b2', '1b2a', 'B2a1', '2a1b'],
454 dtype='|S4')
456 """
457 a_arr = numpy.asarray(a)
458 return _vec_string(a_arr, a_arr.dtype, 'capitalize')
461def _center_dispatcher(a, width, fillchar=None):
462 return (a,)
465@array_function_dispatch(_center_dispatcher)
466def center(a, width, fillchar=' '):
467 """
468 Return a copy of `a` with its elements centered in a string of
469 length `width`.
471 Calls `str.center` element-wise.
473 Parameters
474 ----------
475 a : array_like of str or unicode
477 width : int
478 The length of the resulting strings
479 fillchar : str or unicode, optional
480 The padding character to use (default is space).
482 Returns
483 -------
484 out : ndarray
485 Output array of str or unicode, depending on input
486 types
488 See Also
489 --------
490 str.center
492 Notes
493 -----
494 This function is intended to work with arrays of strings. The
495 fill character is not applied to numeric types.
497 Examples
498 --------
499 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
500 array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
501 >>> np.char.center(c, width=9)
502 array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
503 >>> np.char.center(c, width=9, fillchar='*')
504 array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
505 >>> np.char.center(c, width=1)
506 array(['a', '1', 'b', '2'], dtype='<U1')
508 """
509 a_arr = numpy.asarray(a)
510 width_arr = numpy.asarray(width)
511 size = int(numpy.max(width_arr.flat))
512 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
513 fillchar = asbytes(fillchar)
514 return _vec_string(
515 a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar))
518def _count_dispatcher(a, sub, start=None, end=None):
519 return (a,)
522@array_function_dispatch(_count_dispatcher)
523def count(a, sub, start=0, end=None):
524 """
525 Returns an array with the number of non-overlapping occurrences of
526 substring `sub` in the range [`start`, `end`].
528 Calls `str.count` element-wise.
530 Parameters
531 ----------
532 a : array_like of str or unicode
534 sub : str or unicode
535 The substring to search for.
537 start, end : int, optional
538 Optional arguments `start` and `end` are interpreted as slice
539 notation to specify the range in which to count.
541 Returns
542 -------
543 out : ndarray
544 Output array of ints.
546 See Also
547 --------
548 str.count
550 Examples
551 --------
552 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
553 >>> c
554 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
555 >>> np.char.count(c, 'A')
556 array([3, 1, 1])
557 >>> np.char.count(c, 'aA')
558 array([3, 1, 0])
559 >>> np.char.count(c, 'A', start=1, end=4)
560 array([2, 1, 1])
561 >>> np.char.count(c, 'A', start=1, end=3)
562 array([1, 0, 0])
564 """
565 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
568def _code_dispatcher(a, encoding=None, errors=None):
569 return (a,)
572@array_function_dispatch(_code_dispatcher)
573def decode(a, encoding=None, errors=None):
574 r"""
575 Calls ``bytes.decode`` element-wise.
577 The set of available codecs comes from the Python standard library,
578 and may be extended at runtime. For more information, see the
579 :mod:`codecs` module.
581 Parameters
582 ----------
583 a : array_like of str or unicode
585 encoding : str, optional
586 The name of an encoding
588 errors : str, optional
589 Specifies how to handle encoding errors
591 Returns
592 -------
593 out : ndarray
595 See Also
596 --------
597 :py:meth:`bytes.decode`
599 Notes
600 -----
601 The type of the result will depend on the encoding specified.
603 Examples
604 --------
605 >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
606 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
607 >>> c
608 array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
609 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
610 >>> np.char.decode(c, encoding='cp037')
611 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
613 """
614 return _to_bytes_or_str_array(
615 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
618@array_function_dispatch(_code_dispatcher)
619def encode(a, encoding=None, errors=None):
620 """
621 Calls `str.encode` element-wise.
623 The set of available codecs comes from the Python standard library,
624 and may be extended at runtime. For more information, see the codecs
625 module.
627 Parameters
628 ----------
629 a : array_like of str or unicode
631 encoding : str, optional
632 The name of an encoding
634 errors : str, optional
635 Specifies how to handle encoding errors
637 Returns
638 -------
639 out : ndarray
641 See Also
642 --------
643 str.encode
645 Notes
646 -----
647 The type of the result will depend on the encoding specified.
649 """
650 return _to_bytes_or_str_array(
651 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
654def _endswith_dispatcher(a, suffix, start=None, end=None):
655 return (a,)
658@array_function_dispatch(_endswith_dispatcher)
659def endswith(a, suffix, start=0, end=None):
660 """
661 Returns a boolean array which is `True` where the string element
662 in `a` ends with `suffix`, otherwise `False`.
664 Calls `str.endswith` element-wise.
666 Parameters
667 ----------
668 a : array_like of str or unicode
670 suffix : str
672 start, end : int, optional
673 With optional `start`, test beginning at that position. With
674 optional `end`, stop comparing at that position.
676 Returns
677 -------
678 out : ndarray
679 Outputs an array of bools.
681 See Also
682 --------
683 str.endswith
685 Examples
686 --------
687 >>> s = np.array(['foo', 'bar'])
688 >>> s[0] = 'foo'
689 >>> s[1] = 'bar'
690 >>> s
691 array(['foo', 'bar'], dtype='<U3')
692 >>> np.char.endswith(s, 'ar')
693 array([False, True])
694 >>> np.char.endswith(s, 'a', start=1, end=2)
695 array([False, True])
697 """
698 return _vec_string(
699 a, bool_, 'endswith', [suffix, start] + _clean_args(end))
702def _expandtabs_dispatcher(a, tabsize=None):
703 return (a,)
706@array_function_dispatch(_expandtabs_dispatcher)
707def expandtabs(a, tabsize=8):
708 """
709 Return a copy of each string element where all tab characters are
710 replaced by one or more spaces.
712 Calls `str.expandtabs` element-wise.
714 Return a copy of each string element where all tab characters are
715 replaced by one or more spaces, depending on the current column
716 and the given `tabsize`. The column number is reset to zero after
717 each newline occurring in the string. This doesn't understand other
718 non-printing characters or escape sequences.
720 Parameters
721 ----------
722 a : array_like of str or unicode
723 Input array
724 tabsize : int, optional
725 Replace tabs with `tabsize` number of spaces. If not given defaults
726 to 8 spaces.
728 Returns
729 -------
730 out : ndarray
731 Output array of str or unicode, depending on input type
733 See Also
734 --------
735 str.expandtabs
737 """
738 return _to_bytes_or_str_array(
739 _vec_string(a, object_, 'expandtabs', (tabsize,)), a)
742@array_function_dispatch(_count_dispatcher)
743def find(a, sub, start=0, end=None):
744 """
745 For each element, return the lowest index in the string where
746 substring `sub` is found.
748 Calls `str.find` element-wise.
750 For each element, return the lowest index in the string where
751 substring `sub` is found, such that `sub` is contained in the
752 range [`start`, `end`].
754 Parameters
755 ----------
756 a : array_like of str or unicode
758 sub : str or unicode
760 start, end : int, optional
761 Optional arguments `start` and `end` are interpreted as in
762 slice notation.
764 Returns
765 -------
766 out : ndarray or int
767 Output array of ints. Returns -1 if `sub` is not found.
769 See Also
770 --------
771 str.find
773 Examples
774 --------
775 >>> a = np.array(["NumPy is a Python library"])
776 >>> np.char.find(a, "Python", start=0, end=None)
777 array([11])
779 """
780 return _vec_string(
781 a, int_, 'find', [sub, start] + _clean_args(end))
784@array_function_dispatch(_count_dispatcher)
785def index(a, sub, start=0, end=None):
786 """
787 Like `find`, but raises `ValueError` when the substring is not found.
789 Calls `str.index` element-wise.
791 Parameters
792 ----------
793 a : array_like of str or unicode
795 sub : str or unicode
797 start, end : int, optional
799 Returns
800 -------
801 out : ndarray
802 Output array of ints. Returns -1 if `sub` is not found.
804 See Also
805 --------
806 find, str.find
808 Examples
809 --------
810 >>> a = np.array(["Computer Science"])
811 >>> np.char.index(a, "Science", start=0, end=None)
812 array([9])
814 """
815 return _vec_string(
816 a, int_, 'index', [sub, start] + _clean_args(end))
819@array_function_dispatch(_unary_op_dispatcher)
820def isalnum(a):
821 """
822 Returns true for each element if all characters in the string are
823 alphanumeric and there is at least one character, false otherwise.
825 Calls `str.isalnum` element-wise.
827 For 8-bit strings, this method is locale-dependent.
829 Parameters
830 ----------
831 a : array_like of str or unicode
833 Returns
834 -------
835 out : ndarray
836 Output array of str or unicode, depending on input type
838 See Also
839 --------
840 str.isalnum
841 """
842 return _vec_string(a, bool_, 'isalnum')
845@array_function_dispatch(_unary_op_dispatcher)
846def isalpha(a):
847 """
848 Returns true for each element if all characters in the string are
849 alphabetic and there is at least one character, false otherwise.
851 Calls `str.isalpha` element-wise.
853 For 8-bit strings, this method is locale-dependent.
855 Parameters
856 ----------
857 a : array_like of str or unicode
859 Returns
860 -------
861 out : ndarray
862 Output array of bools
864 See Also
865 --------
866 str.isalpha
867 """
868 return _vec_string(a, bool_, 'isalpha')
871@array_function_dispatch(_unary_op_dispatcher)
872def isdigit(a):
873 """
874 Returns true for each element if all characters in the string are
875 digits and there is at least one character, false otherwise.
877 Calls `str.isdigit` element-wise.
879 For 8-bit strings, this method is locale-dependent.
881 Parameters
882 ----------
883 a : array_like of str or unicode
885 Returns
886 -------
887 out : ndarray
888 Output array of bools
890 See Also
891 --------
892 str.isdigit
894 Examples
895 --------
896 >>> a = np.array(['a', 'b', '0'])
897 >>> np.char.isdigit(a)
898 array([False, False, True])
899 >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])
900 >>> np.char.isdigit(a)
901 array([[False, False, True], [False, True, True]])
902 """
903 return _vec_string(a, bool_, 'isdigit')
906@array_function_dispatch(_unary_op_dispatcher)
907def islower(a):
908 """
909 Returns true for each element if all cased characters in the
910 string are lowercase and there is at least one cased character,
911 false otherwise.
913 Calls `str.islower` element-wise.
915 For 8-bit strings, this method is locale-dependent.
917 Parameters
918 ----------
919 a : array_like of str or unicode
921 Returns
922 -------
923 out : ndarray
924 Output array of bools
926 See Also
927 --------
928 str.islower
929 """
930 return _vec_string(a, bool_, 'islower')
933@array_function_dispatch(_unary_op_dispatcher)
934def isspace(a):
935 """
936 Returns true for each element if there are only whitespace
937 characters in the string and there is at least one character,
938 false otherwise.
940 Calls `str.isspace` element-wise.
942 For 8-bit strings, this method is locale-dependent.
944 Parameters
945 ----------
946 a : array_like of str or unicode
948 Returns
949 -------
950 out : ndarray
951 Output array of bools
953 See Also
954 --------
955 str.isspace
956 """
957 return _vec_string(a, bool_, 'isspace')
960@array_function_dispatch(_unary_op_dispatcher)
961def istitle(a):
962 """
963 Returns true for each element if the element is a titlecased
964 string and there is at least one character, false otherwise.
966 Call `str.istitle` element-wise.
968 For 8-bit strings, this method is locale-dependent.
970 Parameters
971 ----------
972 a : array_like of str or unicode
974 Returns
975 -------
976 out : ndarray
977 Output array of bools
979 See Also
980 --------
981 str.istitle
982 """
983 return _vec_string(a, bool_, 'istitle')
986@array_function_dispatch(_unary_op_dispatcher)
987def isupper(a):
988 """
989 Return true for each element if all cased characters in the
990 string are uppercase and there is at least one character, false
991 otherwise.
993 Call `str.isupper` element-wise.
995 For 8-bit strings, this method is locale-dependent.
997 Parameters
998 ----------
999 a : array_like of str or unicode
1001 Returns
1002 -------
1003 out : ndarray
1004 Output array of bools
1006 See Also
1007 --------
1008 str.isupper
1010 Examples
1011 --------
1012 >>> str = "GHC"
1013 >>> np.char.isupper(str)
1014 array(True)
1015 >>> a = np.array(["hello", "HELLO", "Hello"])
1016 >>> np.char.isupper(a)
1017 array([False, True, False])
1019 """
1020 return _vec_string(a, bool_, 'isupper')
1023def _join_dispatcher(sep, seq):
1024 return (sep, seq)
1027@array_function_dispatch(_join_dispatcher)
1028def join(sep, seq):
1029 """
1030 Return a string which is the concatenation of the strings in the
1031 sequence `seq`.
1033 Calls `str.join` element-wise.
1035 Parameters
1036 ----------
1037 sep : array_like of str or unicode
1038 seq : array_like of str or unicode
1040 Returns
1041 -------
1042 out : ndarray
1043 Output array of str or unicode, depending on input types
1045 See Also
1046 --------
1047 str.join
1049 Examples
1050 --------
1051 >>> np.char.join('-', 'osd')
1052 array('o-s-d', dtype='<U5')
1054 >>> np.char.join(['-', '.'], ['ghc', 'osd'])
1055 array(['g-h-c', 'o.s.d'], dtype='<U5')
1057 """
1058 return _to_bytes_or_str_array(
1059 _vec_string(sep, object_, 'join', (seq,)), seq)
1063def _just_dispatcher(a, width, fillchar=None):
1064 return (a,)
1067@array_function_dispatch(_just_dispatcher)
1068def ljust(a, width, fillchar=' '):
1069 """
1070 Return an array with the elements of `a` left-justified in a
1071 string of length `width`.
1073 Calls `str.ljust` element-wise.
1075 Parameters
1076 ----------
1077 a : array_like of str or unicode
1079 width : int
1080 The length of the resulting strings
1081 fillchar : str or unicode, optional
1082 The character to use for padding
1084 Returns
1085 -------
1086 out : ndarray
1087 Output array of str or unicode, depending on input type
1089 See Also
1090 --------
1091 str.ljust
1093 """
1094 a_arr = numpy.asarray(a)
1095 width_arr = numpy.asarray(width)
1096 size = int(numpy.max(width_arr.flat))
1097 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
1098 fillchar = asbytes(fillchar)
1099 return _vec_string(
1100 a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar))
1103@array_function_dispatch(_unary_op_dispatcher)
1104def lower(a):
1105 """
1106 Return an array with the elements converted to lowercase.
1108 Call `str.lower` element-wise.
1110 For 8-bit strings, this method is locale-dependent.
1112 Parameters
1113 ----------
1114 a : array_like, {str, unicode}
1115 Input array.
1117 Returns
1118 -------
1119 out : ndarray, {str, unicode}
1120 Output array of str or unicode, depending on input type
1122 See Also
1123 --------
1124 str.lower
1126 Examples
1127 --------
1128 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
1129 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1130 >>> np.char.lower(c)
1131 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1133 """
1134 a_arr = numpy.asarray(a)
1135 return _vec_string(a_arr, a_arr.dtype, 'lower')
1138def _strip_dispatcher(a, chars=None):
1139 return (a,)
1142@array_function_dispatch(_strip_dispatcher)
1143def lstrip(a, chars=None):
1144 """
1145 For each element in `a`, return a copy with the leading characters
1146 removed.
1148 Calls `str.lstrip` element-wise.
1150 Parameters
1151 ----------
1152 a : array-like, {str, unicode}
1153 Input array.
1155 chars : {str, unicode}, optional
1156 The `chars` argument is a string specifying the set of
1157 characters to be removed. If omitted or None, the `chars`
1158 argument defaults to removing whitespace. The `chars` argument
1159 is not a prefix; rather, all combinations of its values are
1160 stripped.
1162 Returns
1163 -------
1164 out : ndarray, {str, unicode}
1165 Output array of str or unicode, depending on input type
1167 See Also
1168 --------
1169 str.lstrip
1171 Examples
1172 --------
1173 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1174 >>> c
1175 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1177 The 'a' variable is unstripped from c[1] because whitespace leading.
1179 >>> np.char.lstrip(c, 'a')
1180 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
1183 >>> np.char.lstrip(c, 'A') # leaves c unchanged
1184 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1185 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
1186 ... # XXX: is this a regression? This used to return True
1187 ... # np.char.lstrip(c,'') does not modify c at all.
1188 False
1189 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
1190 True
1192 """
1193 a_arr = numpy.asarray(a)
1194 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
1197def _partition_dispatcher(a, sep):
1198 return (a,)
1201@array_function_dispatch(_partition_dispatcher)
1202def partition(a, sep):
1203 """
1204 Partition each element in `a` around `sep`.
1206 Calls `str.partition` element-wise.
1208 For each element in `a`, split the element as the first
1209 occurrence of `sep`, and return 3 strings containing the part
1210 before the separator, the separator itself, and the part after
1211 the separator. If the separator is not found, return 3 strings
1212 containing the string itself, followed by two empty strings.
1214 Parameters
1215 ----------
1216 a : array_like, {str, unicode}
1217 Input array
1218 sep : {str, unicode}
1219 Separator to split each string element in `a`.
1221 Returns
1222 -------
1223 out : ndarray, {str, unicode}
1224 Output array of str or unicode, depending on input type.
1225 The output array will have an extra dimension with 3
1226 elements per input element.
1228 See Also
1229 --------
1230 str.partition
1232 """
1233 return _to_bytes_or_str_array(
1234 _vec_string(a, object_, 'partition', (sep,)), a)
1237def _replace_dispatcher(a, old, new, count=None):
1238 return (a,)
1241@array_function_dispatch(_replace_dispatcher)
1242def replace(a, old, new, count=None):
1243 """
1244 For each element in `a`, return a copy of the string with all
1245 occurrences of substring `old` replaced by `new`.
1247 Calls `str.replace` element-wise.
1249 Parameters
1250 ----------
1251 a : array-like of str or unicode
1253 old, new : str or unicode
1255 count : int, optional
1256 If the optional argument `count` is given, only the first
1257 `count` occurrences are replaced.
1259 Returns
1260 -------
1261 out : ndarray
1262 Output array of str or unicode, depending on input type
1264 See Also
1265 --------
1266 str.replace
1268 Examples
1269 --------
1270 >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
1271 >>> np.char.replace(a, 'mango', 'banana')
1272 array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
1274 >>> a = np.array(["The dish is fresh", "This is it"])
1275 >>> np.char.replace(a, 'is', 'was')
1276 array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
1277 """
1278 return _to_bytes_or_str_array(
1279 _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
1282@array_function_dispatch(_count_dispatcher)
1283def rfind(a, sub, start=0, end=None):
1284 """
1285 For each element in `a`, return the highest index in the string
1286 where substring `sub` is found, such that `sub` is contained
1287 within [`start`, `end`].
1289 Calls `str.rfind` element-wise.
1291 Parameters
1292 ----------
1293 a : array-like of str or unicode
1295 sub : str or unicode
1297 start, end : int, optional
1298 Optional arguments `start` and `end` are interpreted as in
1299 slice notation.
1301 Returns
1302 -------
1303 out : ndarray
1304 Output array of ints. Return -1 on failure.
1306 See Also
1307 --------
1308 str.rfind
1310 """
1311 return _vec_string(
1312 a, int_, 'rfind', [sub, start] + _clean_args(end))
1315@array_function_dispatch(_count_dispatcher)
1316def rindex(a, sub, start=0, end=None):
1317 """
1318 Like `rfind`, but raises `ValueError` when the substring `sub` is
1319 not found.
1321 Calls `str.rindex` element-wise.
1323 Parameters
1324 ----------
1325 a : array-like of str or unicode
1327 sub : str or unicode
1329 start, end : int, optional
1331 Returns
1332 -------
1333 out : ndarray
1334 Output array of ints.
1336 See Also
1337 --------
1338 rfind, str.rindex
1340 """
1341 return _vec_string(
1342 a, int_, 'rindex', [sub, start] + _clean_args(end))
1345@array_function_dispatch(_just_dispatcher)
1346def rjust(a, width, fillchar=' '):
1347 """
1348 Return an array with the elements of `a` right-justified in a
1349 string of length `width`.
1351 Calls `str.rjust` element-wise.
1353 Parameters
1354 ----------
1355 a : array_like of str or unicode
1357 width : int
1358 The length of the resulting strings
1359 fillchar : str or unicode, optional
1360 The character to use for padding
1362 Returns
1363 -------
1364 out : ndarray
1365 Output array of str or unicode, depending on input type
1367 See Also
1368 --------
1369 str.rjust
1371 """
1372 a_arr = numpy.asarray(a)
1373 width_arr = numpy.asarray(width)
1374 size = int(numpy.max(width_arr.flat))
1375 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
1376 fillchar = asbytes(fillchar)
1377 return _vec_string(
1378 a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar))
1381@array_function_dispatch(_partition_dispatcher)
1382def rpartition(a, sep):
1383 """
1384 Partition (split) each element around the right-most separator.
1386 Calls `str.rpartition` element-wise.
1388 For each element in `a`, split the element as the last
1389 occurrence of `sep`, and return 3 strings containing the part
1390 before the separator, the separator itself, and the part after
1391 the separator. If the separator is not found, return 3 strings
1392 containing the string itself, followed by two empty strings.
1394 Parameters
1395 ----------
1396 a : array_like of str or unicode
1397 Input array
1398 sep : str or unicode
1399 Right-most separator to split each element in array.
1401 Returns
1402 -------
1403 out : ndarray
1404 Output array of string or unicode, depending on input
1405 type. The output array will have an extra dimension with
1406 3 elements per input element.
1408 See Also
1409 --------
1410 str.rpartition
1412 """
1413 return _to_bytes_or_str_array(
1414 _vec_string(a, object_, 'rpartition', (sep,)), a)
1417def _split_dispatcher(a, sep=None, maxsplit=None):
1418 return (a,)
1421@array_function_dispatch(_split_dispatcher)
1422def rsplit(a, sep=None, maxsplit=None):
1423 """
1424 For each element in `a`, return a list of the words in the
1425 string, using `sep` as the delimiter string.
1427 Calls `str.rsplit` element-wise.
1429 Except for splitting from the right, `rsplit`
1430 behaves like `split`.
1432 Parameters
1433 ----------
1434 a : array_like of str or unicode
1436 sep : str or unicode, optional
1437 If `sep` is not specified or None, any whitespace string
1438 is a separator.
1439 maxsplit : int, optional
1440 If `maxsplit` is given, at most `maxsplit` splits are done,
1441 the rightmost ones.
1443 Returns
1444 -------
1445 out : ndarray
1446 Array of list objects
1448 See Also
1449 --------
1450 str.rsplit, split
1452 """
1453 # This will return an array of lists of different sizes, so we
1454 # leave it as an object array
1455 return _vec_string(
1456 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
1459def _strip_dispatcher(a, chars=None):
1460 return (a,)
1463@array_function_dispatch(_strip_dispatcher)
1464def rstrip(a, chars=None):
1465 """
1466 For each element in `a`, return a copy with the trailing
1467 characters removed.
1469 Calls `str.rstrip` element-wise.
1471 Parameters
1472 ----------
1473 a : array-like of str or unicode
1475 chars : str or unicode, optional
1476 The `chars` argument is a string specifying the set of
1477 characters to be removed. If omitted or None, the `chars`
1478 argument defaults to removing whitespace. The `chars` argument
1479 is not a suffix; rather, all combinations of its values are
1480 stripped.
1482 Returns
1483 -------
1484 out : ndarray
1485 Output array of str or unicode, depending on input type
1487 See Also
1488 --------
1489 str.rstrip
1491 Examples
1492 --------
1493 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
1494 array(['aAaAaA', 'abBABba'],
1495 dtype='|S7')
1496 >>> np.char.rstrip(c, b'a')
1497 array(['aAaAaA', 'abBABb'],
1498 dtype='|S7')
1499 >>> np.char.rstrip(c, b'A')
1500 array(['aAaAa', 'abBABba'],
1501 dtype='|S7')
1503 """
1504 a_arr = numpy.asarray(a)
1505 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
1508@array_function_dispatch(_split_dispatcher)
1509def split(a, sep=None, maxsplit=None):
1510 """
1511 For each element in `a`, return a list of the words in the
1512 string, using `sep` as the delimiter string.
1514 Calls `str.split` element-wise.
1516 Parameters
1517 ----------
1518 a : array_like of str or unicode
1520 sep : str or unicode, optional
1521 If `sep` is not specified or None, any whitespace string is a
1522 separator.
1524 maxsplit : int, optional
1525 If `maxsplit` is given, at most `maxsplit` splits are done.
1527 Returns
1528 -------
1529 out : ndarray
1530 Array of list objects
1532 See Also
1533 --------
1534 str.split, rsplit
1536 """
1537 # This will return an array of lists of different sizes, so we
1538 # leave it as an object array
1539 return _vec_string(
1540 a, object_, 'split', [sep] + _clean_args(maxsplit))
1543def _splitlines_dispatcher(a, keepends=None):
1544 return (a,)
1547@array_function_dispatch(_splitlines_dispatcher)
1548def splitlines(a, keepends=None):
1549 """
1550 For each element in `a`, return a list of the lines in the
1551 element, breaking at line boundaries.
1553 Calls `str.splitlines` element-wise.
1555 Parameters
1556 ----------
1557 a : array_like of str or unicode
1559 keepends : bool, optional
1560 Line breaks are not included in the resulting list unless
1561 keepends is given and true.
1563 Returns
1564 -------
1565 out : ndarray
1566 Array of list objects
1568 See Also
1569 --------
1570 str.splitlines
1572 """
1573 return _vec_string(
1574 a, object_, 'splitlines', _clean_args(keepends))
1577def _startswith_dispatcher(a, prefix, start=None, end=None):
1578 return (a,)
1581@array_function_dispatch(_startswith_dispatcher)
1582def startswith(a, prefix, start=0, end=None):
1583 """
1584 Returns a boolean array which is `True` where the string element
1585 in `a` starts with `prefix`, otherwise `False`.
1587 Calls `str.startswith` element-wise.
1589 Parameters
1590 ----------
1591 a : array_like of str or unicode
1593 prefix : str
1595 start, end : int, optional
1596 With optional `start`, test beginning at that position. With
1597 optional `end`, stop comparing at that position.
1599 Returns
1600 -------
1601 out : ndarray
1602 Array of booleans
1604 See Also
1605 --------
1606 str.startswith
1608 """
1609 return _vec_string(
1610 a, bool_, 'startswith', [prefix, start] + _clean_args(end))
1613@array_function_dispatch(_strip_dispatcher)
1614def strip(a, chars=None):
1615 """
1616 For each element in `a`, return a copy with the leading and
1617 trailing characters removed.
1619 Calls `str.strip` element-wise.
1621 Parameters
1622 ----------
1623 a : array-like of str or unicode
1625 chars : str or unicode, optional
1626 The `chars` argument is a string specifying the set of
1627 characters to be removed. If omitted or None, the `chars`
1628 argument defaults to removing whitespace. The `chars` argument
1629 is not a prefix or suffix; rather, all combinations of its
1630 values are stripped.
1632 Returns
1633 -------
1634 out : ndarray
1635 Output array of str or unicode, depending on input type
1637 See Also
1638 --------
1639 str.strip
1641 Examples
1642 --------
1643 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1644 >>> c
1645 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1646 >>> np.char.strip(c)
1647 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
1648 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
1649 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
1650 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
1651 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
1653 """
1654 a_arr = numpy.asarray(a)
1655 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
1658@array_function_dispatch(_unary_op_dispatcher)
1659def swapcase(a):
1660 """
1661 Return element-wise a copy of the string with
1662 uppercase characters converted to lowercase and vice versa.
1664 Calls `str.swapcase` element-wise.
1666 For 8-bit strings, this method is locale-dependent.
1668 Parameters
1669 ----------
1670 a : array_like, {str, unicode}
1671 Input array.
1673 Returns
1674 -------
1675 out : ndarray, {str, unicode}
1676 Output array of str or unicode, depending on input type
1678 See Also
1679 --------
1680 str.swapcase
1682 Examples
1683 --------
1684 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
1685 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
1686 dtype='|S5')
1687 >>> np.char.swapcase(c)
1688 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
1689 dtype='|S5')
1691 """
1692 a_arr = numpy.asarray(a)
1693 return _vec_string(a_arr, a_arr.dtype, 'swapcase')
1696@array_function_dispatch(_unary_op_dispatcher)
1697def title(a):
1698 """
1699 Return element-wise title cased version of string or unicode.
1701 Title case words start with uppercase characters, all remaining cased
1702 characters are lowercase.
1704 Calls `str.title` element-wise.
1706 For 8-bit strings, this method is locale-dependent.
1708 Parameters
1709 ----------
1710 a : array_like, {str, unicode}
1711 Input array.
1713 Returns
1714 -------
1715 out : ndarray
1716 Output array of str or unicode, depending on input type
1718 See Also
1719 --------
1720 str.title
1722 Examples
1723 --------
1724 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
1725 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
1726 dtype='|S5')
1727 >>> np.char.title(c)
1728 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
1729 dtype='|S5')
1731 """
1732 a_arr = numpy.asarray(a)
1733 return _vec_string(a_arr, a_arr.dtype, 'title')
1736def _translate_dispatcher(a, table, deletechars=None):
1737 return (a,)
1740@array_function_dispatch(_translate_dispatcher)
1741def translate(a, table, deletechars=None):
1742 """
1743 For each element in `a`, return a copy of the string where all
1744 characters occurring in the optional argument `deletechars` are
1745 removed, and the remaining characters have been mapped through the
1746 given translation table.
1748 Calls `str.translate` element-wise.
1750 Parameters
1751 ----------
1752 a : array-like of str or unicode
1754 table : str of length 256
1756 deletechars : str
1758 Returns
1759 -------
1760 out : ndarray
1761 Output array of str or unicode, depending on input type
1763 See Also
1764 --------
1765 str.translate
1767 """
1768 a_arr = numpy.asarray(a)
1769 if issubclass(a_arr.dtype.type, str_):
1770 return _vec_string(
1771 a_arr, a_arr.dtype, 'translate', (table,))
1772 else:
1773 return _vec_string(
1774 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
1777@array_function_dispatch(_unary_op_dispatcher)
1778def upper(a):
1779 """
1780 Return an array with the elements converted to uppercase.
1782 Calls `str.upper` element-wise.
1784 For 8-bit strings, this method is locale-dependent.
1786 Parameters
1787 ----------
1788 a : array_like, {str, unicode}
1789 Input array.
1791 Returns
1792 -------
1793 out : ndarray, {str, unicode}
1794 Output array of str or unicode, depending on input type
1796 See Also
1797 --------
1798 str.upper
1800 Examples
1801 --------
1802 >>> c = np.array(['a1b c', '1bca', 'bca1']); c
1803 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1804 >>> np.char.upper(c)
1805 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1807 """
1808 a_arr = numpy.asarray(a)
1809 return _vec_string(a_arr, a_arr.dtype, 'upper')
1812def _zfill_dispatcher(a, width):
1813 return (a,)
1816@array_function_dispatch(_zfill_dispatcher)
1817def zfill(a, width):
1818 """
1819 Return the numeric string left-filled with zeros
1821 Calls `str.zfill` element-wise.
1823 Parameters
1824 ----------
1825 a : array_like, {str, unicode}
1826 Input array.
1827 width : int
1828 Width of string to left-fill elements in `a`.
1830 Returns
1831 -------
1832 out : ndarray, {str, unicode}
1833 Output array of str or unicode, depending on input type
1835 See Also
1836 --------
1837 str.zfill
1839 """
1840 a_arr = numpy.asarray(a)
1841 width_arr = numpy.asarray(width)
1842 size = int(numpy.max(width_arr.flat))
1843 return _vec_string(
1844 a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,))
1847@array_function_dispatch(_unary_op_dispatcher)
1848def isnumeric(a):
1849 """
1850 For each element, return True if there are only numeric
1851 characters in the element.
1853 Calls `str.isnumeric` element-wise.
1855 Numeric characters include digit characters, and all characters
1856 that have the Unicode numeric value property, e.g. ``U+2155,
1857 VULGAR FRACTION ONE FIFTH``.
1859 Parameters
1860 ----------
1861 a : array_like, unicode
1862 Input array.
1864 Returns
1865 -------
1866 out : ndarray, bool
1867 Array of booleans of same shape as `a`.
1869 See Also
1870 --------
1871 str.isnumeric
1873 Examples
1874 --------
1875 >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])
1876 array([ True, False, False, False, False])
1878 """
1879 if not _is_unicode(a):
1880 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1881 return _vec_string(a, bool_, 'isnumeric')
1884@array_function_dispatch(_unary_op_dispatcher)
1885def isdecimal(a):
1886 """
1887 For each element, return True if there are only decimal
1888 characters in the element.
1890 Calls `str.isdecimal` element-wise.
1892 Decimal characters include digit characters, and all characters
1893 that can be used to form decimal-radix numbers,
1894 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
1896 Parameters
1897 ----------
1898 a : array_like, unicode
1899 Input array.
1901 Returns
1902 -------
1903 out : ndarray, bool
1904 Array of booleans identical in shape to `a`.
1906 See Also
1907 --------
1908 str.isdecimal
1910 Examples
1911 --------
1912 >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])
1913 array([ True, False, False, False])
1915 """
1916 if not _is_unicode(a):
1917 raise TypeError(
1918 "isdecimal is only available for Unicode strings and arrays")
1919 return _vec_string(a, bool_, 'isdecimal')
1922@set_module('numpy')
1923class chararray(ndarray):
1924 """
1925 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
1926 strides=None, order=None)
1928 Provides a convenient view on arrays of string and unicode values.
1930 .. note::
1931 The `chararray` class exists for backwards compatibility with
1932 Numarray, it is not recommended for new development. Starting from numpy
1933 1.4, if one needs arrays of strings, it is recommended to use arrays of
1934 `dtype` `object_`, `bytes_` or `str_`, and use the free functions
1935 in the `numpy.char` module for fast vectorized string operations.
1937 Versus a regular NumPy array of type `str` or `unicode`, this
1938 class adds the following functionality:
1940 1) values automatically have whitespace removed from the end
1941 when indexed
1943 2) comparison operators automatically remove whitespace from the
1944 end when comparing values
1946 3) vectorized string operations are provided as methods
1947 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
1949 chararrays should be created using `numpy.char.array` or
1950 `numpy.char.asarray`, rather than this constructor directly.
1952 This constructor creates the array, using `buffer` (with `offset`
1953 and `strides`) if it is not ``None``. If `buffer` is ``None``, then
1954 constructs a new array with `strides` in "C order", unless both
1955 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
1956 is in "Fortran order".
1958 Methods
1959 -------
1960 astype
1961 argsort
1962 copy
1963 count
1964 decode
1965 dump
1966 dumps
1967 encode
1968 endswith
1969 expandtabs
1970 fill
1971 find
1972 flatten
1973 getfield
1974 index
1975 isalnum
1976 isalpha
1977 isdecimal
1978 isdigit
1979 islower
1980 isnumeric
1981 isspace
1982 istitle
1983 isupper
1984 item
1985 join
1986 ljust
1987 lower
1988 lstrip
1989 nonzero
1990 put
1991 ravel
1992 repeat
1993 replace
1994 reshape
1995 resize
1996 rfind
1997 rindex
1998 rjust
1999 rsplit
2000 rstrip
2001 searchsorted
2002 setfield
2003 setflags
2004 sort
2005 split
2006 splitlines
2007 squeeze
2008 startswith
2009 strip
2010 swapaxes
2011 swapcase
2012 take
2013 title
2014 tofile
2015 tolist
2016 tostring
2017 translate
2018 transpose
2019 upper
2020 view
2021 zfill
2023 Parameters
2024 ----------
2025 shape : tuple
2026 Shape of the array.
2027 itemsize : int, optional
2028 Length of each array element, in number of characters. Default is 1.
2029 unicode : bool, optional
2030 Are the array elements of type unicode (True) or string (False).
2031 Default is False.
2032 buffer : object exposing the buffer interface or str, optional
2033 Memory address of the start of the array data. Default is None,
2034 in which case a new array is created.
2035 offset : int, optional
2036 Fixed stride displacement from the beginning of an axis?
2037 Default is 0. Needs to be >=0.
2038 strides : array_like of ints, optional
2039 Strides for the array (see `ndarray.strides` for full description).
2040 Default is None.
2041 order : {'C', 'F'}, optional
2042 The order in which the array data is stored in memory: 'C' ->
2043 "row major" order (the default), 'F' -> "column major"
2044 (Fortran) order.
2046 Examples
2047 --------
2048 >>> charar = np.chararray((3, 3))
2049 >>> charar[:] = 'a'
2050 >>> charar
2051 chararray([[b'a', b'a', b'a'],
2052 [b'a', b'a', b'a'],
2053 [b'a', b'a', b'a']], dtype='|S1')
2055 >>> charar = np.chararray(charar.shape, itemsize=5)
2056 >>> charar[:] = 'abc'
2057 >>> charar
2058 chararray([[b'abc', b'abc', b'abc'],
2059 [b'abc', b'abc', b'abc'],
2060 [b'abc', b'abc', b'abc']], dtype='|S5')
2062 """
2063 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
2064 offset=0, strides=None, order='C'):
2065 global _globalvar
2067 if unicode:
2068 dtype = str_
2069 else:
2070 dtype = bytes_
2072 # force itemsize to be a Python int, since using NumPy integer
2073 # types results in itemsize.itemsize being used as the size of
2074 # strings in the new array.
2075 itemsize = int(itemsize)
2077 if isinstance(buffer, str):
2078 # unicode objects do not have the buffer interface
2079 filler = buffer
2080 buffer = None
2081 else:
2082 filler = None
2084 _globalvar = 1
2085 if buffer is None:
2086 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
2087 order=order)
2088 else:
2089 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
2090 buffer=buffer,
2091 offset=offset, strides=strides,
2092 order=order)
2093 if filler is not None:
2094 self[...] = filler
2095 _globalvar = 0
2096 return self
2098 def __array_finalize__(self, obj):
2099 # The b is a special case because it is used for reconstructing.
2100 if not _globalvar and self.dtype.char not in 'SUbc':
2101 raise ValueError("Can only create a chararray from string data.")
2103 def __getitem__(self, obj):
2104 val = ndarray.__getitem__(self, obj)
2106 if isinstance(val, character):
2107 temp = val.rstrip()
2108 if len(temp) == 0:
2109 val = ''
2110 else:
2111 val = temp
2113 return val
2115 # IMPLEMENTATION NOTE: Most of the methods of this class are
2116 # direct delegations to the free functions in this module.
2117 # However, those that return an array of strings should instead
2118 # return a chararray, so some extra wrapping is required.
2120 def __eq__(self, other):
2121 """
2122 Return (self == other) element-wise.
2124 See Also
2125 --------
2126 equal
2127 """
2128 return equal(self, other)
2130 def __ne__(self, other):
2131 """
2132 Return (self != other) element-wise.
2134 See Also
2135 --------
2136 not_equal
2137 """
2138 return not_equal(self, other)
2140 def __ge__(self, other):
2141 """
2142 Return (self >= other) element-wise.
2144 See Also
2145 --------
2146 greater_equal
2147 """
2148 return greater_equal(self, other)
2150 def __le__(self, other):
2151 """
2152 Return (self <= other) element-wise.
2154 See Also
2155 --------
2156 less_equal
2157 """
2158 return less_equal(self, other)
2160 def __gt__(self, other):
2161 """
2162 Return (self > other) element-wise.
2164 See Also
2165 --------
2166 greater
2167 """
2168 return greater(self, other)
2170 def __lt__(self, other):
2171 """
2172 Return (self < other) element-wise.
2174 See Also
2175 --------
2176 less
2177 """
2178 return less(self, other)
2180 def __add__(self, other):
2181 """
2182 Return (self + other), that is string concatenation,
2183 element-wise for a pair of array_likes of str or unicode.
2185 See Also
2186 --------
2187 add
2188 """
2189 return asarray(add(self, other))
2191 def __radd__(self, other):
2192 """
2193 Return (other + self), that is string concatenation,
2194 element-wise for a pair of array_likes of `bytes_` or `str_`.
2196 See Also
2197 --------
2198 add
2199 """
2200 return asarray(add(numpy.asarray(other), self))
2202 def __mul__(self, i):
2203 """
2204 Return (self * i), that is string multiple concatenation,
2205 element-wise.
2207 See Also
2208 --------
2209 multiply
2210 """
2211 return asarray(multiply(self, i))
2213 def __rmul__(self, i):
2214 """
2215 Return (self * i), that is string multiple concatenation,
2216 element-wise.
2218 See Also
2219 --------
2220 multiply
2221 """
2222 return asarray(multiply(self, i))
2224 def __mod__(self, i):
2225 """
2226 Return (self % i), that is pre-Python 2.6 string formatting
2227 (interpolation), element-wise for a pair of array_likes of `bytes_`
2228 or `str_`.
2230 See Also
2231 --------
2232 mod
2233 """
2234 return asarray(mod(self, i))
2236 def __rmod__(self, other):
2237 return NotImplemented
2239 def argsort(self, axis=-1, kind=None, order=None):
2240 """
2241 Return the indices that sort the array lexicographically.
2243 For full documentation see `numpy.argsort`, for which this method is
2244 in fact merely a "thin wrapper."
2246 Examples
2247 --------
2248 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
2249 >>> c = c.view(np.chararray); c
2250 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
2251 dtype='|S5')
2252 >>> c[c.argsort()]
2253 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
2254 dtype='|S5')
2256 """
2257 return self.__array__().argsort(axis, kind, order)
2258 argsort.__doc__ = ndarray.argsort.__doc__
2260 def capitalize(self):
2261 """
2262 Return a copy of `self` with only the first character of each element
2263 capitalized.
2265 See Also
2266 --------
2267 char.capitalize
2269 """
2270 return asarray(capitalize(self))
2272 def center(self, width, fillchar=' '):
2273 """
2274 Return a copy of `self` with its elements centered in a
2275 string of length `width`.
2277 See Also
2278 --------
2279 center
2280 """
2281 return asarray(center(self, width, fillchar))
2283 def count(self, sub, start=0, end=None):
2284 """
2285 Returns an array with the number of non-overlapping occurrences of
2286 substring `sub` in the range [`start`, `end`].
2288 See Also
2289 --------
2290 char.count
2292 """
2293 return count(self, sub, start, end)
2295 def decode(self, encoding=None, errors=None):
2296 """
2297 Calls ``bytes.decode`` element-wise.
2299 See Also
2300 --------
2301 char.decode
2303 """
2304 return decode(self, encoding, errors)
2306 def encode(self, encoding=None, errors=None):
2307 """
2308 Calls `str.encode` element-wise.
2310 See Also
2311 --------
2312 char.encode
2314 """
2315 return encode(self, encoding, errors)
2317 def endswith(self, suffix, start=0, end=None):
2318 """
2319 Returns a boolean array which is `True` where the string element
2320 in `self` ends with `suffix`, otherwise `False`.
2322 See Also
2323 --------
2324 char.endswith
2326 """
2327 return endswith(self, suffix, start, end)
2329 def expandtabs(self, tabsize=8):
2330 """
2331 Return a copy of each string element where all tab characters are
2332 replaced by one or more spaces.
2334 See Also
2335 --------
2336 char.expandtabs
2338 """
2339 return asarray(expandtabs(self, tabsize))
2341 def find(self, sub, start=0, end=None):
2342 """
2343 For each element, return the lowest index in the string where
2344 substring `sub` is found.
2346 See Also
2347 --------
2348 char.find
2350 """
2351 return find(self, sub, start, end)
2353 def index(self, sub, start=0, end=None):
2354 """
2355 Like `find`, but raises `ValueError` when the substring is not found.
2357 See Also
2358 --------
2359 char.index
2361 """
2362 return index(self, sub, start, end)
2364 def isalnum(self):
2365 """
2366 Returns true for each element if all characters in the string
2367 are alphanumeric and there is at least one character, false
2368 otherwise.
2370 See Also
2371 --------
2372 char.isalnum
2374 """
2375 return isalnum(self)
2377 def isalpha(self):
2378 """
2379 Returns true for each element if all characters in the string
2380 are alphabetic and there is at least one character, false
2381 otherwise.
2383 See Also
2384 --------
2385 char.isalpha
2387 """
2388 return isalpha(self)
2390 def isdigit(self):
2391 """
2392 Returns true for each element if all characters in the string are
2393 digits and there is at least one character, false otherwise.
2395 See Also
2396 --------
2397 char.isdigit
2399 """
2400 return isdigit(self)
2402 def islower(self):
2403 """
2404 Returns true for each element if all cased characters in the
2405 string are lowercase and there is at least one cased character,
2406 false otherwise.
2408 See Also
2409 --------
2410 char.islower
2412 """
2413 return islower(self)
2415 def isspace(self):
2416 """
2417 Returns true for each element if there are only whitespace
2418 characters in the string and there is at least one character,
2419 false otherwise.
2421 See Also
2422 --------
2423 char.isspace
2425 """
2426 return isspace(self)
2428 def istitle(self):
2429 """
2430 Returns true for each element if the element is a titlecased
2431 string and there is at least one character, false otherwise.
2433 See Also
2434 --------
2435 char.istitle
2437 """
2438 return istitle(self)
2440 def isupper(self):
2441 """
2442 Returns true for each element if all cased characters in the
2443 string are uppercase and there is at least one character, false
2444 otherwise.
2446 See Also
2447 --------
2448 char.isupper
2450 """
2451 return isupper(self)
2453 def join(self, seq):
2454 """
2455 Return a string which is the concatenation of the strings in the
2456 sequence `seq`.
2458 See Also
2459 --------
2460 char.join
2462 """
2463 return join(self, seq)
2465 def ljust(self, width, fillchar=' '):
2466 """
2467 Return an array with the elements of `self` left-justified in a
2468 string of length `width`.
2470 See Also
2471 --------
2472 char.ljust
2474 """
2475 return asarray(ljust(self, width, fillchar))
2477 def lower(self):
2478 """
2479 Return an array with the elements of `self` converted to
2480 lowercase.
2482 See Also
2483 --------
2484 char.lower
2486 """
2487 return asarray(lower(self))
2489 def lstrip(self, chars=None):
2490 """
2491 For each element in `self`, return a copy with the leading characters
2492 removed.
2494 See Also
2495 --------
2496 char.lstrip
2498 """
2499 return asarray(lstrip(self, chars))
2501 def partition(self, sep):
2502 """
2503 Partition each element in `self` around `sep`.
2505 See Also
2506 --------
2507 partition
2508 """
2509 return asarray(partition(self, sep))
2511 def replace(self, old, new, count=None):
2512 """
2513 For each element in `self`, return a copy of the string with all
2514 occurrences of substring `old` replaced by `new`.
2516 See Also
2517 --------
2518 char.replace
2520 """
2521 return asarray(replace(self, old, new, count))
2523 def rfind(self, sub, start=0, end=None):
2524 """
2525 For each element in `self`, return the highest index in the string
2526 where substring `sub` is found, such that `sub` is contained
2527 within [`start`, `end`].
2529 See Also
2530 --------
2531 char.rfind
2533 """
2534 return rfind(self, sub, start, end)
2536 def rindex(self, sub, start=0, end=None):
2537 """
2538 Like `rfind`, but raises `ValueError` when the substring `sub` is
2539 not found.
2541 See Also
2542 --------
2543 char.rindex
2545 """
2546 return rindex(self, sub, start, end)
2548 def rjust(self, width, fillchar=' '):
2549 """
2550 Return an array with the elements of `self`
2551 right-justified in a string of length `width`.
2553 See Also
2554 --------
2555 char.rjust
2557 """
2558 return asarray(rjust(self, width, fillchar))
2560 def rpartition(self, sep):
2561 """
2562 Partition each element in `self` around `sep`.
2564 See Also
2565 --------
2566 rpartition
2567 """
2568 return asarray(rpartition(self, sep))
2570 def rsplit(self, sep=None, maxsplit=None):
2571 """
2572 For each element in `self`, return a list of the words in
2573 the string, using `sep` as the delimiter string.
2575 See Also
2576 --------
2577 char.rsplit
2579 """
2580 return rsplit(self, sep, maxsplit)
2582 def rstrip(self, chars=None):
2583 """
2584 For each element in `self`, return a copy with the trailing
2585 characters removed.
2587 See Also
2588 --------
2589 char.rstrip
2591 """
2592 return asarray(rstrip(self, chars))
2594 def split(self, sep=None, maxsplit=None):
2595 """
2596 For each element in `self`, return a list of the words in the
2597 string, using `sep` as the delimiter string.
2599 See Also
2600 --------
2601 char.split
2603 """
2604 return split(self, sep, maxsplit)
2606 def splitlines(self, keepends=None):
2607 """
2608 For each element in `self`, return a list of the lines in the
2609 element, breaking at line boundaries.
2611 See Also
2612 --------
2613 char.splitlines
2615 """
2616 return splitlines(self, keepends)
2618 def startswith(self, prefix, start=0, end=None):
2619 """
2620 Returns a boolean array which is `True` where the string element
2621 in `self` starts with `prefix`, otherwise `False`.
2623 See Also
2624 --------
2625 char.startswith
2627 """
2628 return startswith(self, prefix, start, end)
2630 def strip(self, chars=None):
2631 """
2632 For each element in `self`, return a copy with the leading and
2633 trailing characters removed.
2635 See Also
2636 --------
2637 char.strip
2639 """
2640 return asarray(strip(self, chars))
2642 def swapcase(self):
2643 """
2644 For each element in `self`, return a copy of the string with
2645 uppercase characters converted to lowercase and vice versa.
2647 See Also
2648 --------
2649 char.swapcase
2651 """
2652 return asarray(swapcase(self))
2654 def title(self):
2655 """
2656 For each element in `self`, return a titlecased version of the
2657 string: words start with uppercase characters, all remaining cased
2658 characters are lowercase.
2660 See Also
2661 --------
2662 char.title
2664 """
2665 return asarray(title(self))
2667 def translate(self, table, deletechars=None):
2668 """
2669 For each element in `self`, return a copy of the string where
2670 all characters occurring in the optional argument
2671 `deletechars` are removed, and the remaining characters have
2672 been mapped through the given translation table.
2674 See Also
2675 --------
2676 char.translate
2678 """
2679 return asarray(translate(self, table, deletechars))
2681 def upper(self):
2682 """
2683 Return an array with the elements of `self` converted to
2684 uppercase.
2686 See Also
2687 --------
2688 char.upper
2690 """
2691 return asarray(upper(self))
2693 def zfill(self, width):
2694 """
2695 Return the numeric string left-filled with zeros in a string of
2696 length `width`.
2698 See Also
2699 --------
2700 char.zfill
2702 """
2703 return asarray(zfill(self, width))
2705 def isnumeric(self):
2706 """
2707 For each element in `self`, return True if there are only
2708 numeric characters in the element.
2710 See Also
2711 --------
2712 char.isnumeric
2714 """
2715 return isnumeric(self)
2717 def isdecimal(self):
2718 """
2719 For each element in `self`, return True if there are only
2720 decimal characters in the element.
2722 See Also
2723 --------
2724 char.isdecimal
2726 """
2727 return isdecimal(self)
2730@set_module("numpy.char")
2731def array(obj, itemsize=None, copy=True, unicode=None, order=None):
2732 """
2733 Create a `chararray`.
2735 .. note::
2736 This class is provided for numarray backward-compatibility.
2737 New code (not concerned with numarray compatibility) should use
2738 arrays of type `bytes_` or `str_` and use the free functions
2739 in :mod:`numpy.char <numpy.core.defchararray>` for fast
2740 vectorized string operations instead.
2742 Versus a regular NumPy array of type `str` or `unicode`, this
2743 class adds the following functionality:
2745 1) values automatically have whitespace removed from the end
2746 when indexed
2748 2) comparison operators automatically remove whitespace from the
2749 end when comparing values
2751 3) vectorized string operations are provided as methods
2752 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
2754 Parameters
2755 ----------
2756 obj : array of str or unicode-like
2758 itemsize : int, optional
2759 `itemsize` is the number of characters per scalar in the
2760 resulting array. If `itemsize` is None, and `obj` is an
2761 object array or a Python list, the `itemsize` will be
2762 automatically determined. If `itemsize` is provided and `obj`
2763 is of type str or unicode, then the `obj` string will be
2764 chunked into `itemsize` pieces.
2766 copy : bool, optional
2767 If true (default), then the object is copied. Otherwise, a copy
2768 will only be made if __array__ returns a copy, if obj is a
2769 nested sequence, or if a copy is needed to satisfy any of the other
2770 requirements (`itemsize`, unicode, `order`, etc.).
2772 unicode : bool, optional
2773 When true, the resulting `chararray` can contain Unicode
2774 characters, when false only 8-bit characters. If unicode is
2775 None and `obj` is one of the following:
2777 - a `chararray`,
2778 - an ndarray of type `str` or `unicode`
2779 - a Python str or unicode object,
2781 then the unicode setting of the output array will be
2782 automatically determined.
2784 order : {'C', 'F', 'A'}, optional
2785 Specify the order of the array. If order is 'C' (default), then the
2786 array will be in C-contiguous order (last-index varies the
2787 fastest). If order is 'F', then the returned array
2788 will be in Fortran-contiguous order (first-index varies the
2789 fastest). If order is 'A', then the returned array may
2790 be in any order (either C-, Fortran-contiguous, or even
2791 discontiguous).
2792 """
2793 if isinstance(obj, (bytes, str)):
2794 if unicode is None:
2795 if isinstance(obj, str):
2796 unicode = True
2797 else:
2798 unicode = False
2800 if itemsize is None:
2801 itemsize = len(obj)
2802 shape = len(obj) // itemsize
2804 return chararray(shape, itemsize=itemsize, unicode=unicode,
2805 buffer=obj, order=order)
2807 if isinstance(obj, (list, tuple)):
2808 obj = numpy.asarray(obj)
2810 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
2811 # If we just have a vanilla chararray, create a chararray
2812 # view around it.
2813 if not isinstance(obj, chararray):
2814 obj = obj.view(chararray)
2816 if itemsize is None:
2817 itemsize = obj.itemsize
2818 # itemsize is in 8-bit chars, so for Unicode, we need
2819 # to divide by the size of a single Unicode character,
2820 # which for NumPy is always 4
2821 if issubclass(obj.dtype.type, str_):
2822 itemsize //= 4
2824 if unicode is None:
2825 if issubclass(obj.dtype.type, str_):
2826 unicode = True
2827 else:
2828 unicode = False
2830 if unicode:
2831 dtype = str_
2832 else:
2833 dtype = bytes_
2835 if order is not None:
2836 obj = numpy.asarray(obj, order=order)
2837 if (copy or
2838 (itemsize != obj.itemsize) or
2839 (not unicode and isinstance(obj, str_)) or
2840 (unicode and isinstance(obj, bytes_))):
2841 obj = obj.astype((dtype, int(itemsize)))
2842 return obj
2844 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
2845 if itemsize is None:
2846 # Since no itemsize was specified, convert the input array to
2847 # a list so the ndarray constructor will automatically
2848 # determine the itemsize for us.
2849 obj = obj.tolist()
2850 # Fall through to the default case
2852 if unicode:
2853 dtype = str_
2854 else:
2855 dtype = bytes_
2857 if itemsize is None:
2858 val = narray(obj, dtype=dtype, order=order, subok=True)
2859 else:
2860 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
2861 return val.view(chararray)
2864@set_module("numpy.char")
2865def asarray(obj, itemsize=None, unicode=None, order=None):
2866 """
2867 Convert the input to a `chararray`, copying the data only if
2868 necessary.
2870 Versus a regular NumPy array of type `str` or `unicode`, this
2871 class adds the following functionality:
2873 1) values automatically have whitespace removed from the end
2874 when indexed
2876 2) comparison operators automatically remove whitespace from the
2877 end when comparing values
2879 3) vectorized string operations are provided as methods
2880 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
2882 Parameters
2883 ----------
2884 obj : array of str or unicode-like
2886 itemsize : int, optional
2887 `itemsize` is the number of characters per scalar in the
2888 resulting array. If `itemsize` is None, and `obj` is an
2889 object array or a Python list, the `itemsize` will be
2890 automatically determined. If `itemsize` is provided and `obj`
2891 is of type str or unicode, then the `obj` string will be
2892 chunked into `itemsize` pieces.
2894 unicode : bool, optional
2895 When true, the resulting `chararray` can contain Unicode
2896 characters, when false only 8-bit characters. If unicode is
2897 None and `obj` is one of the following:
2899 - a `chararray`,
2900 - an ndarray of type `str` or 'unicode`
2901 - a Python str or unicode object,
2903 then the unicode setting of the output array will be
2904 automatically determined.
2906 order : {'C', 'F'}, optional
2907 Specify the order of the array. If order is 'C' (default), then the
2908 array will be in C-contiguous order (last-index varies the
2909 fastest). If order is 'F', then the returned array
2910 will be in Fortran-contiguous order (first-index varies the
2911 fastest).
2912 """
2913 return array(obj, itemsize, copy=False,
2914 unicode=unicode, order=order)