Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/core/defchararray.py: 44%

1"""

2This module contains a set of functions for vectorized string

3operations and methods.

5.. note::

6 The `chararray` class exists for backwards compatibility with

7 Numarray, it is not recommended for new development. Starting from numpy

8 1.4, if one needs arrays of strings, it is recommended to use arrays of

9 `dtype` `object_`, `bytes_` or `str_`, and use the free functions

10 in the `numpy.char` module for fast vectorized string operations.

12Some methods will only be available if the corresponding string method is

13available in your version of Python.

15The preferred alias for `defchararray` is `numpy.char`.

17"""

18import functools

20from .._utils import set_module

21from .numerictypes import (

22 bytes_, str_, integer, int_, object_, bool_, character)

23from .numeric import ndarray, compare_chararrays

24from .numeric import array as narray

25from numpy.core.multiarray import _vec_string

26from numpy.core import overrides

27from numpy.compat import asbytes

28import numpy

30__all__ = [

31 'equal', 'not_equal', 'greater_equal', 'less_equal',

32 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',

33 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',

34 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',

35 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',

36 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',

37 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',

38 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',

39 'array', 'asarray'

40 ]

43_globalvar = 0

45array_function_dispatch = functools.partial(

46 overrides.array_function_dispatch, module='numpy.char')

49def _is_unicode(arr):

50 """Returns True if arr is a string or a string array with a dtype that

51 represents a unicode string, otherwise returns False.

53 """

54 if (isinstance(arr, str) or

55 issubclass(numpy.asarray(arr).dtype.type, str)):

56 return True

57 return False

60def _to_bytes_or_str_array(result, output_dtype_like=None):

61 """

62 Helper function to cast a result back into an array

63 with the appropriate dtype if an object array must be used

64 as an intermediary.

65 """

66 ret = numpy.asarray(result.tolist())

67 dtype = getattr(output_dtype_like, 'dtype', None)

68 if dtype is not None:

69 return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False)

70 return ret

73def _clean_args(*args):

74 """

75 Helper function for delegating arguments to Python string

76 functions.

78 Many of the Python string operations that have optional arguments

79 do not use 'None' to indicate a default value. In these cases,

80 we need to remove all None arguments, and those following them.

81 """

82 newargs = []

83 for chk in args:

84 if chk is None:

85 break

86 newargs.append(chk)

87 return newargs

89def _get_num_chars(a):

90 """

91 Helper function that returns the number of characters per field in

92 a string or unicode array. This is to abstract out the fact that

93 for a unicode array this is itemsize / 4.

94 """

95 if issubclass(a.dtype.type, str_):

96 return a.itemsize // 4

97 return a.itemsize

100def _binary_op_dispatcher(x1, x2):

101 return (x1, x2)

102

103

104@array_function_dispatch(_binary_op_dispatcher)

105def equal(x1, x2):

106 """

107 Return (x1 == x2) element-wise.

108

109 Unlike `numpy.equal`, this comparison is performed by first

110 stripping whitespace characters from the end of the string. This

111 behavior is provided for backward-compatibility with numarray.

112

113 Parameters

114 ----------

115 x1, x2 : array_like of str or unicode

116 Input arrays of the same shape.

117

118 Returns

119 -------

120 out : ndarray

121 Output array of bools.

122

123 See Also

124 --------

125 not_equal, greater_equal, less_equal, greater, less

126 """

127 return compare_chararrays(x1, x2, '==', True)

128

129

130@array_function_dispatch(_binary_op_dispatcher)

131def not_equal(x1, x2):

132 """

133 Return (x1 != x2) element-wise.

134

135 Unlike `numpy.not_equal`, this comparison is performed by first

136 stripping whitespace characters from the end of the string. This

137 behavior is provided for backward-compatibility with numarray.

138

139 Parameters

140 ----------

141 x1, x2 : array_like of str or unicode

142 Input arrays of the same shape.

143

144 Returns

145 -------

146 out : ndarray

147 Output array of bools.

148

149 See Also

150 --------

151 equal, greater_equal, less_equal, greater, less

152 """

153 return compare_chararrays(x1, x2, '!=', True)

154

155

156@array_function_dispatch(_binary_op_dispatcher)

157def greater_equal(x1, x2):

158 """

159 Return (x1 >= x2) element-wise.

160

161 Unlike `numpy.greater_equal`, this comparison is performed by

162 first stripping whitespace characters from the end of the string.

163 This behavior is provided for backward-compatibility with

164 numarray.

165

166 Parameters

167 ----------

168 x1, x2 : array_like of str or unicode

169 Input arrays of the same shape.

170

171 Returns

172 -------

173 out : ndarray

174 Output array of bools.

175

176 See Also

177 --------

178 equal, not_equal, less_equal, greater, less

179 """

180 return compare_chararrays(x1, x2, '>=', True)

181

182

183@array_function_dispatch(_binary_op_dispatcher)

184def less_equal(x1, x2):

185 """

186 Return (x1 <= x2) element-wise.

187

188 Unlike `numpy.less_equal`, this comparison is performed by first

189 stripping whitespace characters from the end of the string. This

190 behavior is provided for backward-compatibility with numarray.

191

192 Parameters

193 ----------

194 x1, x2 : array_like of str or unicode

195 Input arrays of the same shape.

196

197 Returns

198 -------

199 out : ndarray

200 Output array of bools.

201

202 See Also

203 --------

204 equal, not_equal, greater_equal, greater, less

205 """

206 return compare_chararrays(x1, x2, '<=', True)

207

208

209@array_function_dispatch(_binary_op_dispatcher)

210def greater(x1, x2):

211 """

212 Return (x1 > x2) element-wise.

213

214 Unlike `numpy.greater`, this comparison is performed by first

215 stripping whitespace characters from the end of the string. This

216 behavior is provided for backward-compatibility with numarray.

217

218 Parameters

219 ----------

220 x1, x2 : array_like of str or unicode

221 Input arrays of the same shape.

222

223 Returns

224 -------

225 out : ndarray

226 Output array of bools.

227

228 See Also

229 --------

230 equal, not_equal, greater_equal, less_equal, less

231 """

232 return compare_chararrays(x1, x2, '>', True)

233

234

235@array_function_dispatch(_binary_op_dispatcher)

236def less(x1, x2):

237 """

238 Return (x1 < x2) element-wise.

239

240 Unlike `numpy.greater`, this comparison is performed by first

241 stripping whitespace characters from the end of the string. This

242 behavior is provided for backward-compatibility with numarray.

243

244 Parameters

245 ----------

246 x1, x2 : array_like of str or unicode

247 Input arrays of the same shape.

248

249 Returns

250 -------

251 out : ndarray

252 Output array of bools.

253

254 See Also

255 --------

256 equal, not_equal, greater_equal, less_equal, greater

257 """

258 return compare_chararrays(x1, x2, '<', True)

259

260

261def _unary_op_dispatcher(a):

262 return (a,)

263

264

265@array_function_dispatch(_unary_op_dispatcher)

266def str_len(a):

267 """

268 Return len(a) element-wise.

269

270 Parameters

271 ----------

272 a : array_like of str or unicode

273

274 Returns

275 -------

276 out : ndarray

277 Output array of integers

278

279 See Also

280 --------

281 len

282

283 Examples

284 --------

285 >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])

286 >>> np.char.str_len(a)

287 array([23, 15])

288 >>> a = np.array([u'\u0420', u'\u043e'])

289 >>> np.char.str_len(a)

290 array([1, 1])

291 >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])

292 >>> np.char.str_len(a)

293 array([[5, 5], [1, 1]])

294 """

295 # Note: __len__, etc. currently return ints, which are not C-integers.

296 # Generally intp would be expected for lengths, although int is sufficient

297 # due to the dtype itemsize limitation.

298 return _vec_string(a, int_, '__len__')

299

300

301@array_function_dispatch(_binary_op_dispatcher)

302def add(x1, x2):

303 """

304 Return element-wise string concatenation for two arrays of str or unicode.

305

306 Arrays `x1` and `x2` must have the same shape.

307

308 Parameters

309 ----------

310 x1 : array_like of str or unicode

311 Input array.

312 x2 : array_like of str or unicode

313 Input array.

314

315 Returns

316 -------

317 add : ndarray

318 Output array of `bytes_` or `str_`, depending on input types

319 of the same shape as `x1` and `x2`.

320

321 """

322 arr1 = numpy.asarray(x1)

323 arr2 = numpy.asarray(x2)

324 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)

325

326 if type(arr1.dtype) != type(arr2.dtype):

327 # Enforce this for now. The solution to it will be implement add

328 # as a ufunc. It never worked right on Python 3: bytes + unicode gave

329 # nonsense unicode + bytes errored, and unicode + object used the

330 # object dtype itemsize as num chars (worked on short strings).

331 # bytes + void worked but promoting void->bytes is dubious also.

332 raise TypeError(

333 "np.char.add() requires both arrays of the same dtype kind, but "

334 f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases "

335 "where this used to work often lead to incorrect results).")

336

337 return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,))

338

339def _multiply_dispatcher(a, i):

340 return (a,)

341

342

343@array_function_dispatch(_multiply_dispatcher)

344def multiply(a, i):

345 """

346 Return (a * i), that is string multiple concatenation,

347 element-wise.

348

349 Values in `i` of less than 0 are treated as 0 (which yields an

350 empty string).

351

352 Parameters

353 ----------

354 a : array_like of str or unicode

355

356 i : array_like of ints

357

358 Returns

359 -------

360 out : ndarray

361 Output array of str or unicode, depending on input types

362

363 Examples

364 --------

365 >>> a = np.array(["a", "b", "c"])

366 >>> np.char.multiply(x, 3)

367 array(['aaa', 'bbb', 'ccc'], dtype='<U3')

368 >>> i = np.array([1, 2, 3])

369 >>> np.char.multiply(a, i)

370 array(['a', 'bb', 'ccc'], dtype='<U3')

371 >>> np.char.multiply(np.array(['a']), i)

372 array(['a', 'aa', 'aaa'], dtype='<U3')

373 >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))

374 >>> np.char.multiply(a, 3)

375 array([['aaa', 'bbb', 'ccc'],

376 ['ddd', 'eee', 'fff']], dtype='<U3')

377 >>> np.char.multiply(a, i)

378 array([['a', 'bb', 'ccc'],

379 ['d', 'ee', 'fff']], dtype='<U3')

380 """

381 a_arr = numpy.asarray(a)

382 i_arr = numpy.asarray(i)

383 if not issubclass(i_arr.dtype.type, integer):

384 raise ValueError("Can only multiply by integers")

385 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)

386 return _vec_string(

387 a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,))

388

389

390def _mod_dispatcher(a, values):

391 return (a, values)

392

393

394@array_function_dispatch(_mod_dispatcher)

395def mod(a, values):

396 """

397 Return (a % i), that is pre-Python 2.6 string formatting

398 (interpolation), element-wise for a pair of array_likes of str

399 or unicode.

400

401 Parameters

402 ----------

403 a : array_like of str or unicode

404

405 values : array_like of values

406 These values will be element-wise interpolated into the string.

407

408 Returns

409 -------

410 out : ndarray

411 Output array of str or unicode, depending on input types

412

413 See Also

414 --------

415 str.__mod__

416

417 """

418 return _to_bytes_or_str_array(

419 _vec_string(a, object_, '__mod__', (values,)), a)

420

421

422@array_function_dispatch(_unary_op_dispatcher)

423def capitalize(a):

424 """

425 Return a copy of `a` with only the first character of each element

426 capitalized.

427

428 Calls `str.capitalize` element-wise.

429

430 For 8-bit strings, this method is locale-dependent.

431

432 Parameters

433 ----------

434 a : array_like of str or unicode

435 Input array of strings to capitalize.

436

437 Returns

438 -------

439 out : ndarray

440 Output array of str or unicode, depending on input

441 types

442

443 See Also

444 --------

445 str.capitalize

446

447 Examples

448 --------

449 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c

450 array(['a1b2', '1b2a', 'b2a1', '2a1b'],

451 dtype='|S4')

452 >>> np.char.capitalize(c)

453 array(['A1b2', '1b2a', 'B2a1', '2a1b'],

454 dtype='|S4')

455

456 """

457 a_arr = numpy.asarray(a)

458 return _vec_string(a_arr, a_arr.dtype, 'capitalize')

459

460

461def _center_dispatcher(a, width, fillchar=None):

462 return (a,)

463

464

465@array_function_dispatch(_center_dispatcher)

466def center(a, width, fillchar=' '):

467 """

468 Return a copy of `a` with its elements centered in a string of

469 length `width`.

470

471 Calls `str.center` element-wise.

472

473 Parameters

474 ----------

475 a : array_like of str or unicode

476

477 width : int

478 The length of the resulting strings

479 fillchar : str or unicode, optional

480 The padding character to use (default is space).

481

482 Returns

483 -------

484 out : ndarray

485 Output array of str or unicode, depending on input

486 types

487

488 See Also

489 --------

490 str.center

491

492 Notes

493 -----

494 This function is intended to work with arrays of strings. The

495 fill character is not applied to numeric types.

496

497 Examples

498 --------

499 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c

500 array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')

501 >>> np.char.center(c, width=9)

502 array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')

503 >>> np.char.center(c, width=9, fillchar='*')

504 array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')

505 >>> np.char.center(c, width=1)

506 array(['a', '1', 'b', '2'], dtype='<U1')

507

508 """

509 a_arr = numpy.asarray(a)

510 width_arr = numpy.asarray(width)

511 size = int(numpy.max(width_arr.flat))

512 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):

513 fillchar = asbytes(fillchar)

514 return _vec_string(

515 a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar))

516

517

518def _count_dispatcher(a, sub, start=None, end=None):

519 return (a,)

520

521

522@array_function_dispatch(_count_dispatcher)

523def count(a, sub, start=0, end=None):

524 """

525 Returns an array with the number of non-overlapping occurrences of

526 substring `sub` in the range [`start`, `end`].

527

528 Calls `str.count` element-wise.

529

530 Parameters

531 ----------

532 a : array_like of str or unicode

533

534 sub : str or unicode

535 The substring to search for.

536

537 start, end : int, optional

538 Optional arguments `start` and `end` are interpreted as slice

539 notation to specify the range in which to count.

540

541 Returns

542 -------

543 out : ndarray

544 Output array of ints.

545

546 See Also

547 --------

548 str.count

549

550 Examples

551 --------

552 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])

553 >>> c

554 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

555 >>> np.char.count(c, 'A')

556 array([3, 1, 1])

557 >>> np.char.count(c, 'aA')

558 array([3, 1, 0])

559 >>> np.char.count(c, 'A', start=1, end=4)

560 array([2, 1, 1])

561 >>> np.char.count(c, 'A', start=1, end=3)

562 array([1, 0, 0])

563

564 """

565 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))

566

567

568def _code_dispatcher(a, encoding=None, errors=None):

569 return (a,)

570

571

572@array_function_dispatch(_code_dispatcher)

573def decode(a, encoding=None, errors=None):

574 r"""

575 Calls ``bytes.decode`` element-wise.

576

577 The set of available codecs comes from the Python standard library,

578 and may be extended at runtime. For more information, see the

579 :mod:`codecs` module.

580

581 Parameters

582 ----------

583 a : array_like of str or unicode

584

585 encoding : str, optional

586 The name of an encoding

587

588 errors : str, optional

589 Specifies how to handle encoding errors

590

591 Returns

592 -------

593 out : ndarray

594

595 See Also

596 --------

597 :py:meth:`bytes.decode`

598

599 Notes

600 -----

601 The type of the result will depend on the encoding specified.

602

603 Examples

604 --------

605 >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',

606 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])

607 >>> c

608 array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',

609 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')

610 >>> np.char.decode(c, encoding='cp037')

611 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

612

613 """

614 return _to_bytes_or_str_array(

615 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))

616

617

618@array_function_dispatch(_code_dispatcher)

619def encode(a, encoding=None, errors=None):

620 """

621 Calls `str.encode` element-wise.

622

623 The set of available codecs comes from the Python standard library,

624 and may be extended at runtime. For more information, see the codecs

625 module.

626

627 Parameters

628 ----------

629 a : array_like of str or unicode

630

631 encoding : str, optional

632 The name of an encoding

633

634 errors : str, optional

635 Specifies how to handle encoding errors

636

637 Returns

638 -------

639 out : ndarray

640

641 See Also

642 --------

643 str.encode

644

645 Notes

646 -----

647 The type of the result will depend on the encoding specified.

648

649 """

650 return _to_bytes_or_str_array(

651 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))

652

653

654def _endswith_dispatcher(a, suffix, start=None, end=None):

655 return (a,)

656

657

658@array_function_dispatch(_endswith_dispatcher)

659def endswith(a, suffix, start=0, end=None):

660 """

661 Returns a boolean array which is `True` where the string element

662 in `a` ends with `suffix`, otherwise `False`.

663

664 Calls `str.endswith` element-wise.

665

666 Parameters

667 ----------

668 a : array_like of str or unicode

669

670 suffix : str

671

672 start, end : int, optional

673 With optional `start`, test beginning at that position. With

674 optional `end`, stop comparing at that position.

675

676 Returns

677 -------

678 out : ndarray

679 Outputs an array of bools.

680

681 See Also

682 --------

683 str.endswith

684

685 Examples

686 --------

687 >>> s = np.array(['foo', 'bar'])

688 >>> s[0] = 'foo'

689 >>> s[1] = 'bar'

690 >>> s

691 array(['foo', 'bar'], dtype='<U3')

692 >>> np.char.endswith(s, 'ar')

693 array([False, True])

694 >>> np.char.endswith(s, 'a', start=1, end=2)

695 array([False, True])

696

697 """

698 return _vec_string(

699 a, bool_, 'endswith', [suffix, start] + _clean_args(end))

700

701

702def _expandtabs_dispatcher(a, tabsize=None):

703 return (a,)

704

705

706@array_function_dispatch(_expandtabs_dispatcher)

707def expandtabs(a, tabsize=8):

708 """

709 Return a copy of each string element where all tab characters are

710 replaced by one or more spaces.

711

712 Calls `str.expandtabs` element-wise.

713

714 Return a copy of each string element where all tab characters are

715 replaced by one or more spaces, depending on the current column

716 and the given `tabsize`. The column number is reset to zero after

717 each newline occurring in the string. This doesn't understand other

718 non-printing characters or escape sequences.

719

720 Parameters

721 ----------

722 a : array_like of str or unicode

723 Input array

724 tabsize : int, optional

725 Replace tabs with `tabsize` number of spaces. If not given defaults

726 to 8 spaces.

727

728 Returns

729 -------

730 out : ndarray

731 Output array of str or unicode, depending on input type

732

733 See Also

734 --------

735 str.expandtabs

736

737 """

738 return _to_bytes_or_str_array(

739 _vec_string(a, object_, 'expandtabs', (tabsize,)), a)

740

741

742@array_function_dispatch(_count_dispatcher)

743def find(a, sub, start=0, end=None):

744 """

745 For each element, return the lowest index in the string where

746 substring `sub` is found.

747

748 Calls `str.find` element-wise.

749

750 For each element, return the lowest index in the string where

751 substring `sub` is found, such that `sub` is contained in the

752 range [`start`, `end`].

753

754 Parameters

755 ----------

756 a : array_like of str or unicode

757

758 sub : str or unicode

759

760 start, end : int, optional

761 Optional arguments `start` and `end` are interpreted as in

762 slice notation.

763

764 Returns

765 -------

766 out : ndarray or int

767 Output array of ints. Returns -1 if `sub` is not found.

768

769 See Also

770 --------

771 str.find

772

773 Examples

774 --------

775 >>> a = np.array(["NumPy is a Python library"])

776 >>> np.char.find(a, "Python", start=0, end=None)

777 array([11])

778

779 """

780 return _vec_string(

781 a, int_, 'find', [sub, start] + _clean_args(end))

782

783

784@array_function_dispatch(_count_dispatcher)

785def index(a, sub, start=0, end=None):

786 """

787 Like `find`, but raises `ValueError` when the substring is not found.

788

789 Calls `str.index` element-wise.

790

791 Parameters

792 ----------

793 a : array_like of str or unicode

794

795 sub : str or unicode

796

797 start, end : int, optional

798

799 Returns

800 -------

801 out : ndarray

802 Output array of ints. Returns -1 if `sub` is not found.

803

804 See Also

805 --------

806 find, str.find

807

808 Examples

809 --------

810 >>> a = np.array(["Computer Science"])

811 >>> np.char.index(a, "Science", start=0, end=None)

812 array([9])

813

814 """

815 return _vec_string(

816 a, int_, 'index', [sub, start] + _clean_args(end))

817

818

819@array_function_dispatch(_unary_op_dispatcher)

820def isalnum(a):

821 """

822 Returns true for each element if all characters in the string are

823 alphanumeric and there is at least one character, false otherwise.

824

825 Calls `str.isalnum` element-wise.

826

827 For 8-bit strings, this method is locale-dependent.

828

829 Parameters

830 ----------

831 a : array_like of str or unicode

832

833 Returns

834 -------

835 out : ndarray

836 Output array of str or unicode, depending on input type

837

838 See Also

839 --------

840 str.isalnum

841 """

842 return _vec_string(a, bool_, 'isalnum')

843

844

845@array_function_dispatch(_unary_op_dispatcher)

846def isalpha(a):

847 """

848 Returns true for each element if all characters in the string are

849 alphabetic and there is at least one character, false otherwise.

850

851 Calls `str.isalpha` element-wise.

852

853 For 8-bit strings, this method is locale-dependent.

854

855 Parameters

856 ----------

857 a : array_like of str or unicode

858

859 Returns

860 -------

861 out : ndarray

862 Output array of bools

863

864 See Also

865 --------

866 str.isalpha

867 """

868 return _vec_string(a, bool_, 'isalpha')

869

870

871@array_function_dispatch(_unary_op_dispatcher)

872def isdigit(a):

873 """

874 Returns true for each element if all characters in the string are

875 digits and there is at least one character, false otherwise.

876

877 Calls `str.isdigit` element-wise.

878

879 For 8-bit strings, this method is locale-dependent.

880

881 Parameters

882 ----------

883 a : array_like of str or unicode

884

885 Returns

886 -------

887 out : ndarray

888 Output array of bools

889

890 See Also

891 --------

892 str.isdigit

893

894 Examples

895 --------

896 >>> a = np.array(['a', 'b', '0'])

897 >>> np.char.isdigit(a)

898 array([False, False, True])

899 >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])

900 >>> np.char.isdigit(a)

901 array([[False, False, True], [False, True, True]])

902 """

903 return _vec_string(a, bool_, 'isdigit')

904

905

906@array_function_dispatch(_unary_op_dispatcher)

907def islower(a):

908 """

909 Returns true for each element if all cased characters in the

910 string are lowercase and there is at least one cased character,

911 false otherwise.

912

913 Calls `str.islower` element-wise.

914

915 For 8-bit strings, this method is locale-dependent.

916

917 Parameters

918 ----------

919 a : array_like of str or unicode

920

921 Returns

922 -------

923 out : ndarray

924 Output array of bools

925

926 See Also

927 --------

928 str.islower

929 """

930 return _vec_string(a, bool_, 'islower')

931

932

933@array_function_dispatch(_unary_op_dispatcher)

934def isspace(a):

935 """

936 Returns true for each element if there are only whitespace

937 characters in the string and there is at least one character,

938 false otherwise.

939

940 Calls `str.isspace` element-wise.

941

942 For 8-bit strings, this method is locale-dependent.

943

944 Parameters

945 ----------

946 a : array_like of str or unicode

947

948 Returns

949 -------

950 out : ndarray

951 Output array of bools

952

953 See Also

954 --------

955 str.isspace

956 """

957 return _vec_string(a, bool_, 'isspace')

958

959

960@array_function_dispatch(_unary_op_dispatcher)

961def istitle(a):

962 """

963 Returns true for each element if the element is a titlecased

964 string and there is at least one character, false otherwise.

965

966 Call `str.istitle` element-wise.

967

968 For 8-bit strings, this method is locale-dependent.

969

970 Parameters

971 ----------

972 a : array_like of str or unicode

973

974 Returns

975 -------

976 out : ndarray

977 Output array of bools

978

979 See Also

980 --------

981 str.istitle

982 """

983 return _vec_string(a, bool_, 'istitle')

984

985

986@array_function_dispatch(_unary_op_dispatcher)

987def isupper(a):

988 """

989 Return true for each element if all cased characters in the

990 string are uppercase and there is at least one character, false

991 otherwise.

992

993 Call `str.isupper` element-wise.

994

995 For 8-bit strings, this method is locale-dependent.

996

997 Parameters

998 ----------

999 a : array_like of str or unicode

1000

1001 Returns

1002 -------

1003 out : ndarray

1004 Output array of bools

1005

1006 See Also

1007 --------

1008 str.isupper

1009

1010 Examples

1011 --------

1012 >>> str = "GHC"

1013 >>> np.char.isupper(str)

1014 array(True)

1015 >>> a = np.array(["hello", "HELLO", "Hello"])

1016 >>> np.char.isupper(a)

1017 array([False, True, False])

1018

1019 """

1020 return _vec_string(a, bool_, 'isupper')

1021

1022

1023def _join_dispatcher(sep, seq):

1024 return (sep, seq)

1025

1026

1027@array_function_dispatch(_join_dispatcher)

1028def join(sep, seq):

1029 """

1030 Return a string which is the concatenation of the strings in the

1031 sequence `seq`.

1032

1033 Calls `str.join` element-wise.

1034

1035 Parameters

1036 ----------

1037 sep : array_like of str or unicode

1038 seq : array_like of str or unicode

1039

1040 Returns

1041 -------

1042 out : ndarray

1043 Output array of str or unicode, depending on input types

1044

1045 See Also

1046 --------

1047 str.join

1048

1049 Examples

1050 --------

1051 >>> np.char.join('-', 'osd')

1052 array('o-s-d', dtype='<U5')

1053

1054 >>> np.char.join(['-', '.'], ['ghc', 'osd'])

1055 array(['g-h-c', 'o.s.d'], dtype='<U5')

1056

1057 """

1058 return _to_bytes_or_str_array(

1059 _vec_string(sep, object_, 'join', (seq,)), seq)

1060

1061

1062

1063def _just_dispatcher(a, width, fillchar=None):

1064 return (a,)

1065

1066

1067@array_function_dispatch(_just_dispatcher)

1068def ljust(a, width, fillchar=' '):

1069 """

1070 Return an array with the elements of `a` left-justified in a

1071 string of length `width`.

1072

1073 Calls `str.ljust` element-wise.

1074

1075 Parameters

1076 ----------

1077 a : array_like of str or unicode

1078

1079 width : int

1080 The length of the resulting strings

1081 fillchar : str or unicode, optional

1082 The character to use for padding

1083

1084 Returns

1085 -------

1086 out : ndarray

1087 Output array of str or unicode, depending on input type

1088

1089 See Also

1090 --------

1091 str.ljust

1092

1093 """

1094 a_arr = numpy.asarray(a)

1095 width_arr = numpy.asarray(width)

1096 size = int(numpy.max(width_arr.flat))

1097 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):

1098 fillchar = asbytes(fillchar)

1099 return _vec_string(

1100 a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar))

1101

1102

1103@array_function_dispatch(_unary_op_dispatcher)

1104def lower(a):

1105 """

1106 Return an array with the elements converted to lowercase.

1107

1108 Call `str.lower` element-wise.

1109

1110 For 8-bit strings, this method is locale-dependent.

1111

1112 Parameters

1113 ----------

1114 a : array_like, {str, unicode}

1115 Input array.

1116

1117 Returns

1118 -------

1119 out : ndarray, {str, unicode}

1120 Output array of str or unicode, depending on input type

1121

1122 See Also

1123 --------

1124 str.lower

1125

1126 Examples

1127 --------

1128 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c

1129 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')

1130 >>> np.char.lower(c)

1131 array(['a1b c', '1bca', 'bca1'], dtype='<U5')

1132

1133 """

1134 a_arr = numpy.asarray(a)

1135 return _vec_string(a_arr, a_arr.dtype, 'lower')

1136

1137

1138def _strip_dispatcher(a, chars=None):

1139 return (a,)

1140

1141

1142@array_function_dispatch(_strip_dispatcher)

1143def lstrip(a, chars=None):

1144 """

1145 For each element in `a`, return a copy with the leading characters

1146 removed.

1147

1148 Calls `str.lstrip` element-wise.

1149

1150 Parameters

1151 ----------

1152 a : array-like, {str, unicode}

1153 Input array.

1154

1155 chars : {str, unicode}, optional

1156 The `chars` argument is a string specifying the set of

1157 characters to be removed. If omitted or None, the `chars`

1158 argument defaults to removing whitespace. The `chars` argument

1159 is not a prefix; rather, all combinations of its values are

1160 stripped.

1161

1162 Returns

1163 -------

1164 out : ndarray, {str, unicode}

1165 Output array of str or unicode, depending on input type

1166

1167 See Also

1168 --------

1169 str.lstrip

1170

1171 Examples

1172 --------

1173 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])

1174 >>> c

1175 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

1176

1177 The 'a' variable is unstripped from c[1] because whitespace leading.

1178

1179 >>> np.char.lstrip(c, 'a')

1180 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')

1181

1182

1183 >>> np.char.lstrip(c, 'A') # leaves c unchanged

1184 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

1185 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()

1186 ... # XXX: is this a regression? This used to return True

1187 ... # np.char.lstrip(c,'') does not modify c at all.

1188 False

1189 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()

1190 True

1191

1192 """

1193 a_arr = numpy.asarray(a)

1194 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))

1195

1196

1197def _partition_dispatcher(a, sep):

1198 return (a,)

1199

1200

1201@array_function_dispatch(_partition_dispatcher)

1202def partition(a, sep):

1203 """

1204 Partition each element in `a` around `sep`.

1205

1206 Calls `str.partition` element-wise.

1207

1208 For each element in `a`, split the element as the first

1209 occurrence of `sep`, and return 3 strings containing the part

1210 before the separator, the separator itself, and the part after

1211 the separator. If the separator is not found, return 3 strings

1212 containing the string itself, followed by two empty strings.

1213

1214 Parameters

1215 ----------

1216 a : array_like, {str, unicode}

1217 Input array

1218 sep : {str, unicode}

1219 Separator to split each string element in `a`.

1220

1221 Returns

1222 -------

1223 out : ndarray, {str, unicode}

1224 Output array of str or unicode, depending on input type.

1225 The output array will have an extra dimension with 3

1226 elements per input element.

1227

1228 See Also

1229 --------

1230 str.partition

1231

1232 """

1233 return _to_bytes_or_str_array(

1234 _vec_string(a, object_, 'partition', (sep,)), a)

1235

1236

1237def _replace_dispatcher(a, old, new, count=None):

1238 return (a,)

1239

1240

1241@array_function_dispatch(_replace_dispatcher)

1242def replace(a, old, new, count=None):

1243 """

1244 For each element in `a`, return a copy of the string with all

1245 occurrences of substring `old` replaced by `new`.

1246

1247 Calls `str.replace` element-wise.

1248

1249 Parameters

1250 ----------

1251 a : array-like of str or unicode

1252

1253 old, new : str or unicode

1254

1255 count : int, optional

1256 If the optional argument `count` is given, only the first

1257 `count` occurrences are replaced.

1258

1259 Returns

1260 -------

1261 out : ndarray

1262 Output array of str or unicode, depending on input type

1263

1264 See Also

1265 --------

1266 str.replace

1267

1268 Examples

1269 --------

1270 >>> a = np.array(["That is a mango", "Monkeys eat mangos"])

1271 >>> np.char.replace(a, 'mango', 'banana')

1272 array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')

1273

1274 >>> a = np.array(["The dish is fresh", "This is it"])

1275 >>> np.char.replace(a, 'is', 'was')

1276 array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')

1277 """

1278 return _to_bytes_or_str_array(

1279 _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)

1280

1281

1282@array_function_dispatch(_count_dispatcher)

1283def rfind(a, sub, start=0, end=None):

1284 """

1285 For each element in `a`, return the highest index in the string

1286 where substring `sub` is found, such that `sub` is contained

1287 within [`start`, `end`].

1288

1289 Calls `str.rfind` element-wise.

1290

1291 Parameters

1292 ----------

1293 a : array-like of str or unicode

1294

1295 sub : str or unicode

1296

1297 start, end : int, optional

1298 Optional arguments `start` and `end` are interpreted as in

1299 slice notation.

1300

1301 Returns

1302 -------

1303 out : ndarray

1304 Output array of ints. Return -1 on failure.

1305

1306 See Also

1307 --------

1308 str.rfind

1309

1310 """

1311 return _vec_string(

1312 a, int_, 'rfind', [sub, start] + _clean_args(end))

1313

1314

1315@array_function_dispatch(_count_dispatcher)

1316def rindex(a, sub, start=0, end=None):

1317 """

1318 Like `rfind`, but raises `ValueError` when the substring `sub` is

1319 not found.

1320

1321 Calls `str.rindex` element-wise.

1322

1323 Parameters

1324 ----------

1325 a : array-like of str or unicode

1326

1327 sub : str or unicode

1328

1329 start, end : int, optional

1330

1331 Returns

1332 -------

1333 out : ndarray

1334 Output array of ints.

1335

1336 See Also

1337 --------

1338 rfind, str.rindex

1339

1340 """

1341 return _vec_string(

1342 a, int_, 'rindex', [sub, start] + _clean_args(end))

1343

1344

1345@array_function_dispatch(_just_dispatcher)

1346def rjust(a, width, fillchar=' '):

1347 """

1348 Return an array with the elements of `a` right-justified in a

1349 string of length `width`.

1350

1351 Calls `str.rjust` element-wise.

1352

1353 Parameters

1354 ----------

1355 a : array_like of str or unicode

1356

1357 width : int

1358 The length of the resulting strings

1359 fillchar : str or unicode, optional

1360 The character to use for padding

1361

1362 Returns

1363 -------

1364 out : ndarray

1365 Output array of str or unicode, depending on input type

1366

1367 See Also

1368 --------

1369 str.rjust

1370

1371 """

1372 a_arr = numpy.asarray(a)

1373 width_arr = numpy.asarray(width)

1374 size = int(numpy.max(width_arr.flat))

1375 if numpy.issubdtype(a_arr.dtype, numpy.bytes_):

1376 fillchar = asbytes(fillchar)

1377 return _vec_string(

1378 a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar))

1379

1380

1381@array_function_dispatch(_partition_dispatcher)

1382def rpartition(a, sep):

1383 """

1384 Partition (split) each element around the right-most separator.

1385

1386 Calls `str.rpartition` element-wise.

1387

1388 For each element in `a`, split the element as the last

1389 occurrence of `sep`, and return 3 strings containing the part

1390 before the separator, the separator itself, and the part after

1391 the separator. If the separator is not found, return 3 strings

1392 containing the string itself, followed by two empty strings.

1393

1394 Parameters

1395 ----------

1396 a : array_like of str or unicode

1397 Input array

1398 sep : str or unicode

1399 Right-most separator to split each element in array.

1400

1401 Returns

1402 -------

1403 out : ndarray

1404 Output array of string or unicode, depending on input

1405 type. The output array will have an extra dimension with

1406 3 elements per input element.

1407

1408 See Also

1409 --------

1410 str.rpartition

1411

1412 """

1413 return _to_bytes_or_str_array(

1414 _vec_string(a, object_, 'rpartition', (sep,)), a)

1415

1416

1417def _split_dispatcher(a, sep=None, maxsplit=None):

1418 return (a,)

1419

1420

1421@array_function_dispatch(_split_dispatcher)

1422def rsplit(a, sep=None, maxsplit=None):

1423 """

1424 For each element in `a`, return a list of the words in the

1425 string, using `sep` as the delimiter string.

1426

1427 Calls `str.rsplit` element-wise.

1428

1429 Except for splitting from the right, `rsplit`

1430 behaves like `split`.

1431

1432 Parameters

1433 ----------

1434 a : array_like of str or unicode

1435

1436 sep : str or unicode, optional

1437 If `sep` is not specified or None, any whitespace string

1438 is a separator.

1439 maxsplit : int, optional

1440 If `maxsplit` is given, at most `maxsplit` splits are done,

1441 the rightmost ones.

1442

1443 Returns

1444 -------

1445 out : ndarray

1446 Array of list objects

1447

1448 See Also

1449 --------

1450 str.rsplit, split

1451

1452 """

1453 # This will return an array of lists of different sizes, so we

1454 # leave it as an object array

1455 return _vec_string(

1456 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))

1457

1458

1459def _strip_dispatcher(a, chars=None):

1460 return (a,)

1461

1462

1463@array_function_dispatch(_strip_dispatcher)

1464def rstrip(a, chars=None):

1465 """

1466 For each element in `a`, return a copy with the trailing

1467 characters removed.

1468

1469 Calls `str.rstrip` element-wise.

1470

1471 Parameters

1472 ----------

1473 a : array-like of str or unicode

1474

1475 chars : str or unicode, optional

1476 The `chars` argument is a string specifying the set of

1477 characters to be removed. If omitted or None, the `chars`

1478 argument defaults to removing whitespace. The `chars` argument

1479 is not a suffix; rather, all combinations of its values are

1480 stripped.

1481

1482 Returns

1483 -------

1484 out : ndarray

1485 Output array of str or unicode, depending on input type

1486

1487 See Also

1488 --------

1489 str.rstrip

1490

1491 Examples

1492 --------

1493 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c

1494 array(['aAaAaA', 'abBABba'],

1495 dtype='|S7')

1496 >>> np.char.rstrip(c, b'a')

1497 array(['aAaAaA', 'abBABb'],

1498 dtype='|S7')

1499 >>> np.char.rstrip(c, b'A')

1500 array(['aAaAa', 'abBABba'],

1501 dtype='|S7')

1502

1503 """

1504 a_arr = numpy.asarray(a)

1505 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))

1506

1507

1508@array_function_dispatch(_split_dispatcher)

1509def split(a, sep=None, maxsplit=None):

1510 """

1511 For each element in `a`, return a list of the words in the

1512 string, using `sep` as the delimiter string.

1513

1514 Calls `str.split` element-wise.

1515

1516 Parameters

1517 ----------

1518 a : array_like of str or unicode

1519

1520 sep : str or unicode, optional

1521 If `sep` is not specified or None, any whitespace string is a

1522 separator.

1523

1524 maxsplit : int, optional

1525 If `maxsplit` is given, at most `maxsplit` splits are done.

1526

1527 Returns

1528 -------

1529 out : ndarray

1530 Array of list objects

1531

1532 See Also

1533 --------

1534 str.split, rsplit

1535

1536 """

1537 # This will return an array of lists of different sizes, so we

1538 # leave it as an object array

1539 return _vec_string(

1540 a, object_, 'split', [sep] + _clean_args(maxsplit))

1541

1542

1543def _splitlines_dispatcher(a, keepends=None):

1544 return (a,)

1545

1546

1547@array_function_dispatch(_splitlines_dispatcher)

1548def splitlines(a, keepends=None):

1549 """

1550 For each element in `a`, return a list of the lines in the

1551 element, breaking at line boundaries.

1552

1553 Calls `str.splitlines` element-wise.

1554

1555 Parameters

1556 ----------

1557 a : array_like of str or unicode

1558

1559 keepends : bool, optional

1560 Line breaks are not included in the resulting list unless

1561 keepends is given and true.

1562

1563 Returns

1564 -------

1565 out : ndarray

1566 Array of list objects

1567

1568 See Also

1569 --------

1570 str.splitlines

1571

1572 """

1573 return _vec_string(

1574 a, object_, 'splitlines', _clean_args(keepends))

1575

1576

1577def _startswith_dispatcher(a, prefix, start=None, end=None):

1578 return (a,)

1579

1580

1581@array_function_dispatch(_startswith_dispatcher)

1582def startswith(a, prefix, start=0, end=None):

1583 """

1584 Returns a boolean array which is `True` where the string element

1585 in `a` starts with `prefix`, otherwise `False`.

1586

1587 Calls `str.startswith` element-wise.

1588

1589 Parameters

1590 ----------

1591 a : array_like of str or unicode

1592

1593 prefix : str

1594

1595 start, end : int, optional

1596 With optional `start`, test beginning at that position. With

1597 optional `end`, stop comparing at that position.

1598

1599 Returns

1600 -------

1601 out : ndarray

1602 Array of booleans

1603

1604 See Also

1605 --------

1606 str.startswith

1607

1608 """

1609 return _vec_string(

1610 a, bool_, 'startswith', [prefix, start] + _clean_args(end))

1611

1612

1613@array_function_dispatch(_strip_dispatcher)

1614def strip(a, chars=None):

1615 """

1616 For each element in `a`, return a copy with the leading and

1617 trailing characters removed.

1618

1619 Calls `str.strip` element-wise.

1620

1621 Parameters

1622 ----------

1623 a : array-like of str or unicode

1624

1625 chars : str or unicode, optional

1626 The `chars` argument is a string specifying the set of

1627 characters to be removed. If omitted or None, the `chars`

1628 argument defaults to removing whitespace. The `chars` argument

1629 is not a prefix or suffix; rather, all combinations of its

1630 values are stripped.

1631

1632 Returns

1633 -------

1634 out : ndarray

1635 Output array of str or unicode, depending on input type

1636

1637 See Also

1638 --------

1639 str.strip

1640

1641 Examples

1642 --------

1643 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])

1644 >>> c

1645 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

1646 >>> np.char.strip(c)

1647 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')

1648 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads

1649 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')

1650 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails

1651 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')

1652

1653 """

1654 a_arr = numpy.asarray(a)

1655 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))

1656

1657

1658@array_function_dispatch(_unary_op_dispatcher)

1659def swapcase(a):

1660 """

1661 Return element-wise a copy of the string with

1662 uppercase characters converted to lowercase and vice versa.

1663

1664 Calls `str.swapcase` element-wise.

1665

1666 For 8-bit strings, this method is locale-dependent.

1667

1668 Parameters

1669 ----------

1670 a : array_like, {str, unicode}

1671 Input array.

1672

1673 Returns

1674 -------

1675 out : ndarray, {str, unicode}

1676 Output array of str or unicode, depending on input type

1677

1678 See Also

1679 --------

1680 str.swapcase

1681

1682 Examples

1683 --------

1684 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c

1685 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],

1686 dtype='|S5')

1687 >>> np.char.swapcase(c)

1688 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],

1689 dtype='|S5')

1690

1691 """

1692 a_arr = numpy.asarray(a)

1693 return _vec_string(a_arr, a_arr.dtype, 'swapcase')

1694

1695

1696@array_function_dispatch(_unary_op_dispatcher)

1697def title(a):

1698 """

1699 Return element-wise title cased version of string or unicode.

1700

1701 Title case words start with uppercase characters, all remaining cased

1702 characters are lowercase.

1703

1704 Calls `str.title` element-wise.

1705

1706 For 8-bit strings, this method is locale-dependent.

1707

1708 Parameters

1709 ----------

1710 a : array_like, {str, unicode}

1711 Input array.

1712

1713 Returns

1714 -------

1715 out : ndarray

1716 Output array of str or unicode, depending on input type

1717

1718 See Also

1719 --------

1720 str.title

1721

1722 Examples

1723 --------

1724 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c

1725 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],

1726 dtype='|S5')

1727 >>> np.char.title(c)

1728 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],

1729 dtype='|S5')

1730

1731 """

1732 a_arr = numpy.asarray(a)

1733 return _vec_string(a_arr, a_arr.dtype, 'title')

1734

1735

1736def _translate_dispatcher(a, table, deletechars=None):

1737 return (a,)

1738

1739

1740@array_function_dispatch(_translate_dispatcher)

1741def translate(a, table, deletechars=None):

1742 """

1743 For each element in `a`, return a copy of the string where all

1744 characters occurring in the optional argument `deletechars` are

1745 removed, and the remaining characters have been mapped through the

1746 given translation table.

1747

1748 Calls `str.translate` element-wise.

1749

1750 Parameters

1751 ----------

1752 a : array-like of str or unicode

1753

1754 table : str of length 256

1755

1756 deletechars : str

1757

1758 Returns

1759 -------

1760 out : ndarray

1761 Output array of str or unicode, depending on input type

1762

1763 See Also

1764 --------

1765 str.translate

1766

1767 """

1768 a_arr = numpy.asarray(a)

1769 if issubclass(a_arr.dtype.type, str_):

1770 return _vec_string(

1771 a_arr, a_arr.dtype, 'translate', (table,))

1772 else:

1773 return _vec_string(

1774 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))

1775

1776

1777@array_function_dispatch(_unary_op_dispatcher)

1778def upper(a):

1779 """

1780 Return an array with the elements converted to uppercase.

1781

1782 Calls `str.upper` element-wise.

1783

1784 For 8-bit strings, this method is locale-dependent.

1785

1786 Parameters

1787 ----------

1788 a : array_like, {str, unicode}

1789 Input array.

1790

1791 Returns

1792 -------

1793 out : ndarray, {str, unicode}

1794 Output array of str or unicode, depending on input type

1795

1796 See Also

1797 --------

1798 str.upper

1799

1800 Examples

1801 --------

1802 >>> c = np.array(['a1b c', '1bca', 'bca1']); c

1803 array(['a1b c', '1bca', 'bca1'], dtype='<U5')

1804 >>> np.char.upper(c)

1805 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')

1806

1807 """

1808 a_arr = numpy.asarray(a)

1809 return _vec_string(a_arr, a_arr.dtype, 'upper')

1810

1811

1812def _zfill_dispatcher(a, width):

1813 return (a,)

1814

1815

1816@array_function_dispatch(_zfill_dispatcher)

1817def zfill(a, width):

1818 """

1819 Return the numeric string left-filled with zeros

1820

1821 Calls `str.zfill` element-wise.

1822

1823 Parameters

1824 ----------

1825 a : array_like, {str, unicode}

1826 Input array.

1827 width : int

1828 Width of string to left-fill elements in `a`.

1829

1830 Returns

1831 -------

1832 out : ndarray, {str, unicode}

1833 Output array of str or unicode, depending on input type

1834

1835 See Also

1836 --------

1837 str.zfill

1838

1839 """

1840 a_arr = numpy.asarray(a)

1841 width_arr = numpy.asarray(width)

1842 size = int(numpy.max(width_arr.flat))

1843 return _vec_string(

1844 a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,))

1845

1846

1847@array_function_dispatch(_unary_op_dispatcher)

1848def isnumeric(a):

1849 """

1850 For each element, return True if there are only numeric

1851 characters in the element.

1852

1853 Calls `str.isnumeric` element-wise.

1854

1855 Numeric characters include digit characters, and all characters

1856 that have the Unicode numeric value property, e.g. ``U+2155,

1857 VULGAR FRACTION ONE FIFTH``.

1858

1859 Parameters

1860 ----------

1861 a : array_like, unicode

1862 Input array.

1863

1864 Returns

1865 -------

1866 out : ndarray, bool

1867 Array of booleans of same shape as `a`.

1868

1869 See Also

1870 --------

1871 str.isnumeric

1872

1873 Examples

1874 --------

1875 >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])

1876 array([ True, False, False, False, False])

1877

1878 """

1879 if not _is_unicode(a):

1880 raise TypeError("isnumeric is only available for Unicode strings and arrays")

1881 return _vec_string(a, bool_, 'isnumeric')

1882

1883

1884@array_function_dispatch(_unary_op_dispatcher)

1885def isdecimal(a):

1886 """

1887 For each element, return True if there are only decimal

1888 characters in the element.

1889

1890 Calls `str.isdecimal` element-wise.

1891

1892 Decimal characters include digit characters, and all characters

1893 that can be used to form decimal-radix numbers,

1894 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.

1895

1896 Parameters

1897 ----------

1898 a : array_like, unicode

1899 Input array.

1900

1901 Returns

1902 -------

1903 out : ndarray, bool

1904 Array of booleans identical in shape to `a`.

1905

1906 See Also

1907 --------

1908 str.isdecimal

1909

1910 Examples

1911 --------

1912 >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])

1913 array([ True, False, False, False])

1914

1915 """

1916 if not _is_unicode(a):

1917 raise TypeError(

1918 "isdecimal is only available for Unicode strings and arrays")

1919 return _vec_string(a, bool_, 'isdecimal')

1920

1921

1922@set_module('numpy')

1923class chararray(ndarray):

1924 """

1925 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,

1926 strides=None, order=None)

1927

1928 Provides a convenient view on arrays of string and unicode values.

1929

1930 .. note::

1931 The `chararray` class exists for backwards compatibility with

1932 Numarray, it is not recommended for new development. Starting from numpy

1933 1.4, if one needs arrays of strings, it is recommended to use arrays of

1934 `dtype` `object_`, `bytes_` or `str_`, and use the free functions

1935 in the `numpy.char` module for fast vectorized string operations.

1936

1937 Versus a regular NumPy array of type `str` or `unicode`, this

1938 class adds the following functionality:

1939

1940 1) values automatically have whitespace removed from the end

1941 when indexed

1942

1943 2) comparison operators automatically remove whitespace from the

1944 end when comparing values

1945

1946 3) vectorized string operations are provided as methods

1947 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)

1948

1949 chararrays should be created using `numpy.char.array` or

1950 `numpy.char.asarray`, rather than this constructor directly.

1951

1952 This constructor creates the array, using `buffer` (with `offset`

1953 and `strides`) if it is not ``None``. If `buffer` is ``None``, then

1954 constructs a new array with `strides` in "C order", unless both

1955 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`

1956 is in "Fortran order".

1957

1958 Methods

1959 -------

1960 astype

1961 argsort

1962 copy

1963 count

1964 decode

1965 dump

1966 dumps

1967 encode

1968 endswith

1969 expandtabs

1970 fill

1971 find

1972 flatten

1973 getfield

1974 index

1975 isalnum

1976 isalpha

1977 isdecimal

1978 isdigit

1979 islower

1980 isnumeric

1981 isspace

1982 istitle

1983 isupper

1984 item

1985 join

1986 ljust

1987 lower

1988 lstrip

1989 nonzero

1990 put

1991 ravel

1992 repeat

1993 replace

1994 reshape

1995 resize

1996 rfind

1997 rindex

1998 rjust

1999 rsplit

2000 rstrip

2001 searchsorted

2002 setfield

2003 setflags

2004 sort

2005 split

2006 splitlines

2007 squeeze

2008 startswith

2009 strip

2010 swapaxes

2011 swapcase

2012 take

2013 title

2014 tofile

2015 tolist

2016 tostring

2017 translate

2018 transpose

2019 upper

2020 view

2021 zfill

2022

2023 Parameters

2024 ----------

2025 shape : tuple

2026 Shape of the array.

2027 itemsize : int, optional

2028 Length of each array element, in number of characters. Default is 1.

2029 unicode : bool, optional

2030 Are the array elements of type unicode (True) or string (False).

2031 Default is False.

2032 buffer : object exposing the buffer interface or str, optional

2033 Memory address of the start of the array data. Default is None,

2034 in which case a new array is created.

2035 offset : int, optional

2036 Fixed stride displacement from the beginning of an axis?

2037 Default is 0. Needs to be >=0.

2038 strides : array_like of ints, optional

2039 Strides for the array (see `ndarray.strides` for full description).

2040 Default is None.

2041 order : {'C', 'F'}, optional

2042 The order in which the array data is stored in memory: 'C' ->

2043 "row major" order (the default), 'F' -> "column major"

2044 (Fortran) order.

2045

2046 Examples

2047 --------

2048 >>> charar = np.chararray((3, 3))

2049 >>> charar[:] = 'a'

2050 >>> charar

2051 chararray([[b'a', b'a', b'a'],

2052 [b'a', b'a', b'a'],

2053 [b'a', b'a', b'a']], dtype='|S1')

2054

2055 >>> charar = np.chararray(charar.shape, itemsize=5)

2056 >>> charar[:] = 'abc'

2057 >>> charar

2058 chararray([[b'abc', b'abc', b'abc'],

2059 [b'abc', b'abc', b'abc'],

2060 [b'abc', b'abc', b'abc']], dtype='|S5')

2061

2062 """

2063 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,

2064 offset=0, strides=None, order='C'):

2065 global _globalvar

2066

2067 if unicode:

2068 dtype = str_

2069 else:

2070 dtype = bytes_

2071

2072 # force itemsize to be a Python int, since using NumPy integer

2073 # types results in itemsize.itemsize being used as the size of

2074 # strings in the new array.

2075 itemsize = int(itemsize)

2076

2077 if isinstance(buffer, str):

2078 # unicode objects do not have the buffer interface

2079 filler = buffer

2080 buffer = None

2081 else:

2082 filler = None

2083

2084 _globalvar = 1

2085 if buffer is None:

2086 self = ndarray.__new__(subtype, shape, (dtype, itemsize),

2087 order=order)

2088 else:

2089 self = ndarray.__new__(subtype, shape, (dtype, itemsize),

2090 buffer=buffer,

2091 offset=offset, strides=strides,

2092 order=order)

2093 if filler is not None:

2094 self[...] = filler

2095 _globalvar = 0

2096 return self

2097

2098 def __array_finalize__(self, obj):

2099 # The b is a special case because it is used for reconstructing.

2100 if not _globalvar and self.dtype.char not in 'SUbc':

2101 raise ValueError("Can only create a chararray from string data.")

2102

2103 def __getitem__(self, obj):

2104 val = ndarray.__getitem__(self, obj)

2105

2106 if isinstance(val, character):

2107 temp = val.rstrip()

2108 if len(temp) == 0:

2109 val = ''

2110 else:

2111 val = temp

2112

2113 return val

2114

2115 # IMPLEMENTATION NOTE: Most of the methods of this class are

2116 # direct delegations to the free functions in this module.

2117 # However, those that return an array of strings should instead

2118 # return a chararray, so some extra wrapping is required.

2119

2120 def __eq__(self, other):

2121 """

2122 Return (self == other) element-wise.

2123

2124 See Also

2125 --------

2126 equal

2127 """

2128 return equal(self, other)

2129

2130 def __ne__(self, other):

2131 """

2132 Return (self != other) element-wise.

2133

2134 See Also

2135 --------

2136 not_equal

2137 """

2138 return not_equal(self, other)

2139

2140 def __ge__(self, other):

2141 """

2142 Return (self >= other) element-wise.

2143

2144 See Also

2145 --------

2146 greater_equal

2147 """

2148 return greater_equal(self, other)

2149

2150 def __le__(self, other):

2151 """

2152 Return (self <= other) element-wise.

2153

2154 See Also

2155 --------

2156 less_equal

2157 """

2158 return less_equal(self, other)

2159

2160 def __gt__(self, other):

2161 """

2162 Return (self > other) element-wise.

2163

2164 See Also

2165 --------

2166 greater

2167 """

2168 return greater(self, other)

2169

2170 def __lt__(self, other):

2171 """

2172 Return (self < other) element-wise.

2173

2174 See Also

2175 --------

2176 less

2177 """

2178 return less(self, other)

2179

2180 def __add__(self, other):

2181 """

2182 Return (self + other), that is string concatenation,

2183 element-wise for a pair of array_likes of str or unicode.

2184

2185 See Also

2186 --------

2187 add

2188 """

2189 return asarray(add(self, other))

2190

2191 def __radd__(self, other):

2192 """

2193 Return (other + self), that is string concatenation,

2194 element-wise for a pair of array_likes of `bytes_` or `str_`.

2195

2196 See Also

2197 --------

2198 add

2199 """

2200 return asarray(add(numpy.asarray(other), self))

2201

2202 def __mul__(self, i):

2203 """

2204 Return (self * i), that is string multiple concatenation,

2205 element-wise.

2206

2207 See Also

2208 --------

2209 multiply

2210 """

2211 return asarray(multiply(self, i))

2212

2213 def __rmul__(self, i):

2214 """

2215 Return (self * i), that is string multiple concatenation,

2216 element-wise.

2217

2218 See Also

2219 --------

2220 multiply

2221 """

2222 return asarray(multiply(self, i))

2223

2224 def __mod__(self, i):

2225 """

2226 Return (self % i), that is pre-Python 2.6 string formatting

2227 (interpolation), element-wise for a pair of array_likes of `bytes_`

2228 or `str_`.

2229

2230 See Also

2231 --------

2232 mod

2233 """

2234 return asarray(mod(self, i))

2235

2236 def __rmod__(self, other):

2237 return NotImplemented

2238

2239 def argsort(self, axis=-1, kind=None, order=None):

2240 """

2241 Return the indices that sort the array lexicographically.

2242

2243 For full documentation see `numpy.argsort`, for which this method is

2244 in fact merely a "thin wrapper."

2245

2246 Examples

2247 --------

2248 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')

2249 >>> c = c.view(np.chararray); c

2250 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],

2251 dtype='|S5')

2252 >>> c[c.argsort()]

2253 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],

2254 dtype='|S5')

2255

2256 """

2257 return self.__array__().argsort(axis, kind, order)

2258 argsort.__doc__ = ndarray.argsort.__doc__

2259

2260 def capitalize(self):

2261 """

2262 Return a copy of `self` with only the first character of each element

2263 capitalized.

2264

2265 See Also

2266 --------

2267 char.capitalize

2268

2269 """

2270 return asarray(capitalize(self))

2271

2272 def center(self, width, fillchar=' '):

2273 """

2274 Return a copy of `self` with its elements centered in a

2275 string of length `width`.

2276

2277 See Also

2278 --------

2279 center

2280 """

2281 return asarray(center(self, width, fillchar))

2282

2283 def count(self, sub, start=0, end=None):

2284 """

2285 Returns an array with the number of non-overlapping occurrences of

2286 substring `sub` in the range [`start`, `end`].

2287

2288 See Also

2289 --------

2290 char.count

2291

2292 """

2293 return count(self, sub, start, end)

2294

2295 def decode(self, encoding=None, errors=None):

2296 """

2297 Calls ``bytes.decode`` element-wise.

2298

2299 See Also

2300 --------

2301 char.decode

2302

2303 """

2304 return decode(self, encoding, errors)

2305

2306 def encode(self, encoding=None, errors=None):

2307 """

2308 Calls `str.encode` element-wise.

2309

2310 See Also

2311 --------

2312 char.encode

2313

2314 """

2315 return encode(self, encoding, errors)

2316

2317 def endswith(self, suffix, start=0, end=None):

2318 """

2319 Returns a boolean array which is `True` where the string element

2320 in `self` ends with `suffix`, otherwise `False`.

2321

2322 See Also

2323 --------

2324 char.endswith

2325

2326 """

2327 return endswith(self, suffix, start, end)

2328

2329 def expandtabs(self, tabsize=8):

2330 """

2331 Return a copy of each string element where all tab characters are

2332 replaced by one or more spaces.

2333

2334 See Also

2335 --------

2336 char.expandtabs

2337

2338 """

2339 return asarray(expandtabs(self, tabsize))

2340

2341 def find(self, sub, start=0, end=None):

2342 """

2343 For each element, return the lowest index in the string where

2344 substring `sub` is found.

2345

2346 See Also

2347 --------

2348 char.find

2349

2350 """

2351 return find(self, sub, start, end)

2352

2353 def index(self, sub, start=0, end=None):

2354 """

2355 Like `find`, but raises `ValueError` when the substring is not found.

2356

2357 See Also

2358 --------

2359 char.index

2360

2361 """

2362 return index(self, sub, start, end)

2363

2364 def isalnum(self):

2365 """

2366 Returns true for each element if all characters in the string

2367 are alphanumeric and there is at least one character, false

2368 otherwise.

2369

2370 See Also

2371 --------

2372 char.isalnum

2373

2374 """

2375 return isalnum(self)

2376

2377 def isalpha(self):

2378 """

2379 Returns true for each element if all characters in the string

2380 are alphabetic and there is at least one character, false

2381 otherwise.

2382

2383 See Also

2384 --------

2385 char.isalpha

2386

2387 """

2388 return isalpha(self)

2389

2390 def isdigit(self):

2391 """

2392 Returns true for each element if all characters in the string are

2393 digits and there is at least one character, false otherwise.

2394

2395 See Also

2396 --------

2397 char.isdigit

2398

2399 """

2400 return isdigit(self)

2401

2402 def islower(self):

2403 """

2404 Returns true for each element if all cased characters in the

2405 string are lowercase and there is at least one cased character,

2406 false otherwise.

2407

2408 See Also

2409 --------

2410 char.islower

2411

2412 """

2413 return islower(self)

2414

2415 def isspace(self):

2416 """

2417 Returns true for each element if there are only whitespace

2418 characters in the string and there is at least one character,

2419 false otherwise.

2420

2421 See Also

2422 --------

2423 char.isspace

2424

2425 """

2426 return isspace(self)

2427

2428 def istitle(self):

2429 """

2430 Returns true for each element if the element is a titlecased

2431 string and there is at least one character, false otherwise.

2432

2433 See Also

2434 --------

2435 char.istitle

2436

2437 """

2438 return istitle(self)

2439

2440 def isupper(self):

2441 """

2442 Returns true for each element if all cased characters in the

2443 string are uppercase and there is at least one character, false

2444 otherwise.

2445

2446 See Also

2447 --------

2448 char.isupper

2449

2450 """

2451 return isupper(self)

2452

2453 def join(self, seq):

2454 """

2455 Return a string which is the concatenation of the strings in the

2456 sequence `seq`.

2457

2458 See Also

2459 --------

2460 char.join

2461

2462 """

2463 return join(self, seq)

2464

2465 def ljust(self, width, fillchar=' '):

2466 """

2467 Return an array with the elements of `self` left-justified in a

2468 string of length `width`.

2469

2470 See Also

2471 --------

2472 char.ljust

2473

2474 """

2475 return asarray(ljust(self, width, fillchar))

2476

2477 def lower(self):

2478 """

2479 Return an array with the elements of `self` converted to

2480 lowercase.

2481

2482 See Also

2483 --------

2484 char.lower

2485

2486 """

2487 return asarray(lower(self))

2488

2489 def lstrip(self, chars=None):

2490 """

2491 For each element in `self`, return a copy with the leading characters

2492 removed.

2493

2494 See Also

2495 --------

2496 char.lstrip

2497

2498 """

2499 return asarray(lstrip(self, chars))

2500

2501 def partition(self, sep):

2502 """

2503 Partition each element in `self` around `sep`.

2504

2505 See Also

2506 --------

2507 partition

2508 """

2509 return asarray(partition(self, sep))

2510

2511 def replace(self, old, new, count=None):

2512 """

2513 For each element in `self`, return a copy of the string with all

2514 occurrences of substring `old` replaced by `new`.

2515

2516 See Also

2517 --------

2518 char.replace

2519

2520 """

2521 return asarray(replace(self, old, new, count))

2522

2523 def rfind(self, sub, start=0, end=None):

2524 """

2525 For each element in `self`, return the highest index in the string

2526 where substring `sub` is found, such that `sub` is contained

2527 within [`start`, `end`].

2528

2529 See Also

2530 --------

2531 char.rfind

2532

2533 """

2534 return rfind(self, sub, start, end)

2535

2536 def rindex(self, sub, start=0, end=None):

2537 """

2538 Like `rfind`, but raises `ValueError` when the substring `sub` is

2539 not found.

2540

2541 See Also

2542 --------

2543 char.rindex

2544

2545 """

2546 return rindex(self, sub, start, end)

2547

2548 def rjust(self, width, fillchar=' '):

2549 """

2550 Return an array with the elements of `self`

2551 right-justified in a string of length `width`.

2552

2553 See Also

2554 --------

2555 char.rjust

2556

2557 """

2558 return asarray(rjust(self, width, fillchar))

2559

2560 def rpartition(self, sep):

2561 """

2562 Partition each element in `self` around `sep`.

2563

2564 See Also

2565 --------

2566 rpartition

2567 """

2568 return asarray(rpartition(self, sep))

2569

2570 def rsplit(self, sep=None, maxsplit=None):

2571 """

2572 For each element in `self`, return a list of the words in

2573 the string, using `sep` as the delimiter string.

2574

2575 See Also

2576 --------

2577 char.rsplit

2578

2579 """

2580 return rsplit(self, sep, maxsplit)

2581

2582 def rstrip(self, chars=None):

2583 """

2584 For each element in `self`, return a copy with the trailing

2585 characters removed.

2586

2587 See Also

2588 --------

2589 char.rstrip

2590

2591 """

2592 return asarray(rstrip(self, chars))

2593

2594 def split(self, sep=None, maxsplit=None):

2595 """

2596 For each element in `self`, return a list of the words in the

2597 string, using `sep` as the delimiter string.

2598

2599 See Also

2600 --------

2601 char.split

2602

2603 """

2604 return split(self, sep, maxsplit)

2605

2606 def splitlines(self, keepends=None):

2607 """

2608 For each element in `self`, return a list of the lines in the

2609 element, breaking at line boundaries.

2610

2611 See Also

2612 --------

2613 char.splitlines

2614

2615 """

2616 return splitlines(self, keepends)

2617

2618 def startswith(self, prefix, start=0, end=None):

2619 """

2620 Returns a boolean array which is `True` where the string element

2621 in `self` starts with `prefix`, otherwise `False`.

2622

2623 See Also

2624 --------

2625 char.startswith

2626

2627 """

2628 return startswith(self, prefix, start, end)

2629

2630 def strip(self, chars=None):

2631 """

2632 For each element in `self`, return a copy with the leading and

2633 trailing characters removed.

2634

2635 See Also

2636 --------

2637 char.strip

2638

2639 """

2640 return asarray(strip(self, chars))

2641

2642 def swapcase(self):

2643 """

2644 For each element in `self`, return a copy of the string with

2645 uppercase characters converted to lowercase and vice versa.

2646

2647 See Also

2648 --------

2649 char.swapcase

2650

2651 """

2652 return asarray(swapcase(self))

2653

2654 def title(self):

2655 """

2656 For each element in `self`, return a titlecased version of the

2657 string: words start with uppercase characters, all remaining cased

2658 characters are lowercase.

2659

2660 See Also

2661 --------

2662 char.title

2663

2664 """

2665 return asarray(title(self))

2666

2667 def translate(self, table, deletechars=None):

2668 """

2669 For each element in `self`, return a copy of the string where

2670 all characters occurring in the optional argument

2671 `deletechars` are removed, and the remaining characters have

2672 been mapped through the given translation table.

2673

2674 See Also

2675 --------

2676 char.translate

2677

2678 """

2679 return asarray(translate(self, table, deletechars))

2680

2681 def upper(self):

2682 """

2683 Return an array with the elements of `self` converted to

2684 uppercase.

2685

2686 See Also

2687 --------

2688 char.upper

2689

2690 """

2691 return asarray(upper(self))

2692

2693 def zfill(self, width):

2694 """

2695 Return the numeric string left-filled with zeros in a string of

2696 length `width`.

2697

2698 See Also

2699 --------

2700 char.zfill

2701

2702 """

2703 return asarray(zfill(self, width))

2704

2705 def isnumeric(self):

2706 """

2707 For each element in `self`, return True if there are only

2708 numeric characters in the element.

2709

2710 See Also

2711 --------

2712 char.isnumeric

2713

2714 """

2715 return isnumeric(self)

2716

2717 def isdecimal(self):

2718 """

2719 For each element in `self`, return True if there are only

2720 decimal characters in the element.

2721

2722 See Also

2723 --------

2724 char.isdecimal

2725

2726 """

2727 return isdecimal(self)

2728

2729

2730@set_module("numpy.char")

2731def array(obj, itemsize=None, copy=True, unicode=None, order=None):

2732 """

2733 Create a `chararray`.

2734

2735 .. note::

2736 This class is provided for numarray backward-compatibility.

2737 New code (not concerned with numarray compatibility) should use

2738 arrays of type `bytes_` or `str_` and use the free functions

2739 in :mod:`numpy.char <numpy.core.defchararray>` for fast

2740 vectorized string operations instead.

2741

2742 Versus a regular NumPy array of type `str` or `unicode`, this

2743 class adds the following functionality:

2744

2745 1) values automatically have whitespace removed from the end

2746 when indexed

2747

2748 2) comparison operators automatically remove whitespace from the

2749 end when comparing values

2750

2751 3) vectorized string operations are provided as methods

2752 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)

2753

2754 Parameters

2755 ----------

2756 obj : array of str or unicode-like

2757

2758 itemsize : int, optional

2759 `itemsize` is the number of characters per scalar in the

2760 resulting array. If `itemsize` is None, and `obj` is an

2761 object array or a Python list, the `itemsize` will be

2762 automatically determined. If `itemsize` is provided and `obj`

2763 is of type str or unicode, then the `obj` string will be

2764 chunked into `itemsize` pieces.

2765

2766 copy : bool, optional

2767 If true (default), then the object is copied. Otherwise, a copy

2768 will only be made if __array__ returns a copy, if obj is a

2769 nested sequence, or if a copy is needed to satisfy any of the other

2770 requirements (`itemsize`, unicode, `order`, etc.).

2771

2772 unicode : bool, optional

2773 When true, the resulting `chararray` can contain Unicode

2774 characters, when false only 8-bit characters. If unicode is

2775 None and `obj` is one of the following:

2776

2777 - a `chararray`,

2778 - an ndarray of type `str` or `unicode`

2779 - a Python str or unicode object,

2780

2781 then the unicode setting of the output array will be

2782 automatically determined.

2783

2784 order : {'C', 'F', 'A'}, optional

2785 Specify the order of the array. If order is 'C' (default), then the

2786 array will be in C-contiguous order (last-index varies the

2787 fastest). If order is 'F', then the returned array

2788 will be in Fortran-contiguous order (first-index varies the

2789 fastest). If order is 'A', then the returned array may

2790 be in any order (either C-, Fortran-contiguous, or even

2791 discontiguous).

2792 """

2793 if isinstance(obj, (bytes, str)):

2794 if unicode is None:

2795 if isinstance(obj, str):

2796 unicode = True

2797 else:

2798 unicode = False

2799

2800 if itemsize is None:

2801 itemsize = len(obj)

2802 shape = len(obj) // itemsize

2803

2804 return chararray(shape, itemsize=itemsize, unicode=unicode,

2805 buffer=obj, order=order)

2806

2807 if isinstance(obj, (list, tuple)):

2808 obj = numpy.asarray(obj)

2809

2810 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):

2811 # If we just have a vanilla chararray, create a chararray

2812 # view around it.

2813 if not isinstance(obj, chararray):

2814 obj = obj.view(chararray)

2815

2816 if itemsize is None:

2817 itemsize = obj.itemsize

2818 # itemsize is in 8-bit chars, so for Unicode, we need

2819 # to divide by the size of a single Unicode character,

2820 # which for NumPy is always 4

2821 if issubclass(obj.dtype.type, str_):

2822 itemsize //= 4

2823

2824 if unicode is None:

2825 if issubclass(obj.dtype.type, str_):

2826 unicode = True

2827 else:

2828 unicode = False

2829

2830 if unicode:

2831 dtype = str_

2832 else:

2833 dtype = bytes_

2834

2835 if order is not None:

2836 obj = numpy.asarray(obj, order=order)

2837 if (copy or

2838 (itemsize != obj.itemsize) or

2839 (not unicode and isinstance(obj, str_)) or

2840 (unicode and isinstance(obj, bytes_))):

2841 obj = obj.astype((dtype, int(itemsize)))

2842 return obj

2843

2844 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):

2845 if itemsize is None:

2846 # Since no itemsize was specified, convert the input array to

2847 # a list so the ndarray constructor will automatically

2848 # determine the itemsize for us.

2849 obj = obj.tolist()

2850 # Fall through to the default case

2851

2852 if unicode:

2853 dtype = str_

2854 else:

2855 dtype = bytes_

2856

2857 if itemsize is None:

2858 val = narray(obj, dtype=dtype, order=order, subok=True)

2859 else:

2860 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)

2861 return val.view(chararray)

2862

2863

2864@set_module("numpy.char")

2865def asarray(obj, itemsize=None, unicode=None, order=None):

2866 """

2867 Convert the input to a `chararray`, copying the data only if

2868 necessary.

2869

2870 Versus a regular NumPy array of type `str` or `unicode`, this

2871 class adds the following functionality:

2872

2873 1) values automatically have whitespace removed from the end

2874 when indexed

2875

2876 2) comparison operators automatically remove whitespace from the

2877 end when comparing values

2878

2879 3) vectorized string operations are provided as methods

2880 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)

2881

2882 Parameters

2883 ----------

2884 obj : array of str or unicode-like

2885

2886 itemsize : int, optional

2887 `itemsize` is the number of characters per scalar in the

2888 resulting array. If `itemsize` is None, and `obj` is an

2889 object array or a Python list, the `itemsize` will be

2890 automatically determined. If `itemsize` is provided and `obj`

2891 is of type str or unicode, then the `obj` string will be

2892 chunked into `itemsize` pieces.

2893

2894 unicode : bool, optional

2895 When true, the resulting `chararray` can contain Unicode

2896 characters, when false only 8-bit characters. If unicode is

2897 None and `obj` is one of the following:

2898

2899 - a `chararray`,

2900 - an ndarray of type `str` or 'unicode`

2901 - a Python str or unicode object,

2902

2903 then the unicode setting of the output array will be

2904 automatically determined.

2905

2906 order : {'C', 'F'}, optional

2907 Specify the order of the array. If order is 'C' (default), then the

2908 array will be in C-contiguous order (last-index varies the

2909 fastest). If order is 'F', then the returned array

2910 will be in Fortran-contiguous order (first-index varies the

2911 fastest).

2912 """

2913 return array(obj, itemsize, copy=False,

2914 unicode=unicode, order=order)