Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/numpy/core/defchararray.py: 45%

1"""

2This module contains a set of functions for vectorized string

3operations and methods.

5.. note::

6 The `chararray` class exists for backwards compatibility with

7 Numarray, it is not recommended for new development. Starting from numpy

8 1.4, if one needs arrays of strings, it is recommended to use arrays of

9 `dtype` `object_`, `string_` or `unicode_`, and use the free functions

10 in the `numpy.char` module for fast vectorized string operations.

12Some methods will only be available if the corresponding string method is

13available in your version of Python.

15The preferred alias for `defchararray` is `numpy.char`.

17"""

18import functools

19from .numerictypes import (

20 string_, unicode_, integer, int_, object_, bool_, character)

21from .numeric import ndarray, compare_chararrays

22from .numeric import array as narray

23from numpy.core.multiarray import _vec_string

24from numpy.core.overrides import set_module

25from numpy.core import overrides

26from numpy.compat import asbytes

27import numpy

29__all__ = [

30 'equal', 'not_equal', 'greater_equal', 'less_equal',

31 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',

32 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',

33 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',

34 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',

35 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',

36 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',

37 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',

38 'array', 'asarray'

39 ]

42_globalvar = 0

44array_function_dispatch = functools.partial(

45 overrides.array_function_dispatch, module='numpy.char')

48def _use_unicode(*args):

49 """

50 Helper function for determining the output type of some string

51 operations.

53 For an operation on two ndarrays, if at least one is unicode, the

54 result should be unicode.

55 """

56 for x in args:

57 if (isinstance(x, str) or

58 issubclass(numpy.asarray(x).dtype.type, unicode_)):

59 return unicode_

60 return string_

62def _to_string_or_unicode_array(result):

63 """

64 Helper function to cast a result back into a string or unicode array

65 if an object array must be used as an intermediary.

66 """

67 return numpy.asarray(result.tolist())

69def _clean_args(*args):

70 """

71 Helper function for delegating arguments to Python string

72 functions.

74 Many of the Python string operations that have optional arguments

75 do not use 'None' to indicate a default value. In these cases,

76 we need to remove all None arguments, and those following them.

77 """

78 newargs = []

79 for chk in args:

80 if chk is None:

81 break

82 newargs.append(chk)

83 return newargs

85def _get_num_chars(a):

86 """

87 Helper function that returns the number of characters per field in

88 a string or unicode array. This is to abstract out the fact that

89 for a unicode array this is itemsize / 4.

90 """

91 if issubclass(a.dtype.type, unicode_):

92 return a.itemsize // 4

93 return a.itemsize

96def _binary_op_dispatcher(x1, x2):

97 return (x1, x2)

100@array_function_dispatch(_binary_op_dispatcher)

101def equal(x1, x2):

102 """

103 Return (x1 == x2) element-wise.

104

105 Unlike `numpy.equal`, this comparison is performed by first

106 stripping whitespace characters from the end of the string. This

107 behavior is provided for backward-compatibility with numarray.

108

109 Parameters

110 ----------

111 x1, x2 : array_like of str or unicode

112 Input arrays of the same shape.

113

114 Returns

115 -------

116 out : ndarray

117 Output array of bools.

118

119 See Also

120 --------

121 not_equal, greater_equal, less_equal, greater, less

122 """

123 return compare_chararrays(x1, x2, '==', True)

124

125

126@array_function_dispatch(_binary_op_dispatcher)

127def not_equal(x1, x2):

128 """

129 Return (x1 != x2) element-wise.

130

131 Unlike `numpy.not_equal`, this comparison is performed by first

132 stripping whitespace characters from the end of the string. This

133 behavior is provided for backward-compatibility with numarray.

134

135 Parameters

136 ----------

137 x1, x2 : array_like of str or unicode

138 Input arrays of the same shape.

139

140 Returns

141 -------

142 out : ndarray

143 Output array of bools.

144

145 See Also

146 --------

147 equal, greater_equal, less_equal, greater, less

148 """

149 return compare_chararrays(x1, x2, '!=', True)

150

151

152@array_function_dispatch(_binary_op_dispatcher)

153def greater_equal(x1, x2):

154 """

155 Return (x1 >= x2) element-wise.

156

157 Unlike `numpy.greater_equal`, this comparison is performed by

158 first stripping whitespace characters from the end of the string.

159 This behavior is provided for backward-compatibility with

160 numarray.

161

162 Parameters

163 ----------

164 x1, x2 : array_like of str or unicode

165 Input arrays of the same shape.

166

167 Returns

168 -------

169 out : ndarray

170 Output array of bools.

171

172 See Also

173 --------

174 equal, not_equal, less_equal, greater, less

175 """

176 return compare_chararrays(x1, x2, '>=', True)

177

178

179@array_function_dispatch(_binary_op_dispatcher)

180def less_equal(x1, x2):

181 """

182 Return (x1 <= x2) element-wise.

183

184 Unlike `numpy.less_equal`, this comparison is performed by first

185 stripping whitespace characters from the end of the string. This

186 behavior is provided for backward-compatibility with numarray.

187

188 Parameters

189 ----------

190 x1, x2 : array_like of str or unicode

191 Input arrays of the same shape.

192

193 Returns

194 -------

195 out : ndarray

196 Output array of bools.

197

198 See Also

199 --------

200 equal, not_equal, greater_equal, greater, less

201 """

202 return compare_chararrays(x1, x2, '<=', True)

203

204

205@array_function_dispatch(_binary_op_dispatcher)

206def greater(x1, x2):

207 """

208 Return (x1 > x2) element-wise.

209

210 Unlike `numpy.greater`, this comparison is performed by first

211 stripping whitespace characters from the end of the string. This

212 behavior is provided for backward-compatibility with numarray.

213

214 Parameters

215 ----------

216 x1, x2 : array_like of str or unicode

217 Input arrays of the same shape.

218

219 Returns

220 -------

221 out : ndarray

222 Output array of bools.

223

224 See Also

225 --------

226 equal, not_equal, greater_equal, less_equal, less

227 """

228 return compare_chararrays(x1, x2, '>', True)

229

230

231@array_function_dispatch(_binary_op_dispatcher)

232def less(x1, x2):

233 """

234 Return (x1 < x2) element-wise.

235

236 Unlike `numpy.greater`, this comparison is performed by first

237 stripping whitespace characters from the end of the string. This

238 behavior is provided for backward-compatibility with numarray.

239

240 Parameters

241 ----------

242 x1, x2 : array_like of str or unicode

243 Input arrays of the same shape.

244

245 Returns

246 -------

247 out : ndarray

248 Output array of bools.

249

250 See Also

251 --------

252 equal, not_equal, greater_equal, less_equal, greater

253 """

254 return compare_chararrays(x1, x2, '<', True)

255

256

257def _unary_op_dispatcher(a):

258 return (a,)

259

260

261@array_function_dispatch(_unary_op_dispatcher)

262def str_len(a):

263 """

264 Return len(a) element-wise.

265

266 Parameters

267 ----------

268 a : array_like of str or unicode

269

270 Returns

271 -------

272 out : ndarray

273 Output array of integers

274

275 See Also

276 --------

277 builtins.len

278

279 Examples

280 --------

281 >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])

282 >>> np.char.str_len(a)

283 array([23, 15])

284 >>> a = np.array([u'\u0420', u'\u043e'])

285 >>> np.char.str_len(a)

286 array([1, 1])

287 >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])

288 >>> np.char.str_len(a)

289 array([[5, 5], [1, 1]])

290 """

291 # Note: __len__, etc. currently return ints, which are not C-integers.

292 # Generally intp would be expected for lengths, although int is sufficient

293 # due to the dtype itemsize limitation.

294 return _vec_string(a, int_, '__len__')

295

296

297@array_function_dispatch(_binary_op_dispatcher)

298def add(x1, x2):

299 """

300 Return element-wise string concatenation for two arrays of str or unicode.

301

302 Arrays `x1` and `x2` must have the same shape.

303

304 Parameters

305 ----------

306 x1 : array_like of str or unicode

307 Input array.

308 x2 : array_like of str or unicode

309 Input array.

310

311 Returns

312 -------

313 add : ndarray

314 Output array of `string_` or `unicode_`, depending on input types

315 of the same shape as `x1` and `x2`.

316

317 """

318 arr1 = numpy.asarray(x1)

319 arr2 = numpy.asarray(x2)

320 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)

321 dtype = _use_unicode(arr1, arr2)

322 return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))

323

324

325def _multiply_dispatcher(a, i):

326 return (a,)

327

328

329@array_function_dispatch(_multiply_dispatcher)

330def multiply(a, i):

331 """

332 Return (a * i), that is string multiple concatenation,

333 element-wise.

334

335 Values in `i` of less than 0 are treated as 0 (which yields an

336 empty string).

337

338 Parameters

339 ----------

340 a : array_like of str or unicode

341

342 i : array_like of ints

343

344 Returns

345 -------

346 out : ndarray

347 Output array of str or unicode, depending on input types

348

349 Examples

350 --------

351 >>> a = np.array(["a", "b", "c"])

352 >>> np.char.multiply(x, 3)

353 array(['aaa', 'bbb', 'ccc'], dtype='<U3')

354 >>> i = np.array([1, 2, 3])

355 >>> np.char.multiply(a, i)

356 array(['a', 'bb', 'ccc'], dtype='<U3')

357 >>> np.char.multiply(np.array(['a']), i)

358 array(['a', 'aa', 'aaa'], dtype='<U3')

359 >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))

360 >>> np.char.multiply(a, 3)

361 array([['aaa', 'bbb', 'ccc'],

362 ['ddd', 'eee', 'fff']], dtype='<U3')

363 >>> np.char.multiply(a, i)

364 array([['a', 'bb', 'ccc'],

365 ['d', 'ee', 'fff']], dtype='<U3')

366 """

367 a_arr = numpy.asarray(a)

368 i_arr = numpy.asarray(i)

369 if not issubclass(i_arr.dtype.type, integer):

370 raise ValueError("Can only multiply by integers")

371 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)

372 return _vec_string(

373 a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))

374

375

376def _mod_dispatcher(a, values):

377 return (a, values)

378

379

380@array_function_dispatch(_mod_dispatcher)

381def mod(a, values):

382 """

383 Return (a % i), that is pre-Python 2.6 string formatting

384 (interpolation), element-wise for a pair of array_likes of str

385 or unicode.

386

387 Parameters

388 ----------

389 a : array_like of str or unicode

390

391 values : array_like of values

392 These values will be element-wise interpolated into the string.

393

394 Returns

395 -------

396 out : ndarray

397 Output array of str or unicode, depending on input types

398

399 See Also

400 --------

401 str.__mod__

402

403 """

404 return _to_string_or_unicode_array(

405 _vec_string(a, object_, '__mod__', (values,)))

406

407

408@array_function_dispatch(_unary_op_dispatcher)

409def capitalize(a):

410 """

411 Return a copy of `a` with only the first character of each element

412 capitalized.

413

414 Calls `str.capitalize` element-wise.

415

416 For 8-bit strings, this method is locale-dependent.

417

418 Parameters

419 ----------

420 a : array_like of str or unicode

421 Input array of strings to capitalize.

422

423 Returns

424 -------

425 out : ndarray

426 Output array of str or unicode, depending on input

427 types

428

429 See Also

430 --------

431 str.capitalize

432

433 Examples

434 --------

435 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c

436 array(['a1b2', '1b2a', 'b2a1', '2a1b'],

437 dtype='|S4')

438 >>> np.char.capitalize(c)

439 array(['A1b2', '1b2a', 'B2a1', '2a1b'],

440 dtype='|S4')

441

442 """

443 a_arr = numpy.asarray(a)

444 return _vec_string(a_arr, a_arr.dtype, 'capitalize')

445

446

447def _center_dispatcher(a, width, fillchar=None):

448 return (a,)

449

450

451@array_function_dispatch(_center_dispatcher)

452def center(a, width, fillchar=' '):

453 """

454 Return a copy of `a` with its elements centered in a string of

455 length `width`.

456

457 Calls `str.center` element-wise.

458

459 Parameters

460 ----------

461 a : array_like of str or unicode

462

463 width : int

464 The length of the resulting strings

465 fillchar : str or unicode, optional

466 The padding character to use (default is space).

467

468 Returns

469 -------

470 out : ndarray

471 Output array of str or unicode, depending on input

472 types

473

474 See Also

475 --------

476 str.center

477

478 Notes

479 -----

480 This function is intended to work with arrays of strings. The

481 fill character is not applied to numeric types.

482

483 Examples

484 --------

485 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c

486 array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')

487 >>> np.char.center(c, width=9)

488 array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')

489 >>> np.char.center(c, width=9, fillchar='*')

490 array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')

491 >>> np.char.center(c, width=1)

492 array(['a', '1', 'b', '2'], dtype='<U1')

493

494 """

495 a_arr = numpy.asarray(a)

496 width_arr = numpy.asarray(width)

497 size = int(numpy.max(width_arr.flat))

498 if numpy.issubdtype(a_arr.dtype, numpy.string_):

499 fillchar = asbytes(fillchar)

500 return _vec_string(

501 a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))

502

503

504def _count_dispatcher(a, sub, start=None, end=None):

505 return (a,)

506

507

508@array_function_dispatch(_count_dispatcher)

509def count(a, sub, start=0, end=None):

510 """

511 Returns an array with the number of non-overlapping occurrences of

512 substring `sub` in the range [`start`, `end`].

513

514 Calls `str.count` element-wise.

515

516 Parameters

517 ----------

518 a : array_like of str or unicode

519

520 sub : str or unicode

521 The substring to search for.

522

523 start, end : int, optional

524 Optional arguments `start` and `end` are interpreted as slice

525 notation to specify the range in which to count.

526

527 Returns

528 -------

529 out : ndarray

530 Output array of ints.

531

532 See Also

533 --------

534 str.count

535

536 Examples

537 --------

538 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])

539 >>> c

540 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

541 >>> np.char.count(c, 'A')

542 array([3, 1, 1])

543 >>> np.char.count(c, 'aA')

544 array([3, 1, 0])

545 >>> np.char.count(c, 'A', start=1, end=4)

546 array([2, 1, 1])

547 >>> np.char.count(c, 'A', start=1, end=3)

548 array([1, 0, 0])

549

550 """

551 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))

552

553

554def _code_dispatcher(a, encoding=None, errors=None):

555 return (a,)

556

557

558@array_function_dispatch(_code_dispatcher)

559def decode(a, encoding=None, errors=None):

560 r"""

561 Calls ``bytes.decode`` element-wise.

562

563 The set of available codecs comes from the Python standard library,

564 and may be extended at runtime. For more information, see the

565 :mod:`codecs` module.

566

567 Parameters

568 ----------

569 a : array_like of str or unicode

570

571 encoding : str, optional

572 The name of an encoding

573

574 errors : str, optional

575 Specifies how to handle encoding errors

576

577 Returns

578 -------

579 out : ndarray

580

581 See Also

582 --------

583 :py:meth:`bytes.decode`

584

585 Notes

586 -----

587 The type of the result will depend on the encoding specified.

588

589 Examples

590 --------

591 >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',

592 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])

593 >>> c

594 array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',

595 ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')

596 >>> np.char.decode(c, encoding='cp037')

597 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

598

599 """

600 return _to_string_or_unicode_array(

601 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))

602

603

604@array_function_dispatch(_code_dispatcher)

605def encode(a, encoding=None, errors=None):

606 """

607 Calls `str.encode` element-wise.

608

609 The set of available codecs comes from the Python standard library,

610 and may be extended at runtime. For more information, see the codecs

611 module.

612

613 Parameters

614 ----------

615 a : array_like of str or unicode

616

617 encoding : str, optional

618 The name of an encoding

619

620 errors : str, optional

621 Specifies how to handle encoding errors

622

623 Returns

624 -------

625 out : ndarray

626

627 See Also

628 --------

629 str.encode

630

631 Notes

632 -----

633 The type of the result will depend on the encoding specified.

634

635 """

636 return _to_string_or_unicode_array(

637 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))

638

639

640def _endswith_dispatcher(a, suffix, start=None, end=None):

641 return (a,)

642

643

644@array_function_dispatch(_endswith_dispatcher)

645def endswith(a, suffix, start=0, end=None):

646 """

647 Returns a boolean array which is `True` where the string element

648 in `a` ends with `suffix`, otherwise `False`.

649

650 Calls `str.endswith` element-wise.

651

652 Parameters

653 ----------

654 a : array_like of str or unicode

655

656 suffix : str

657

658 start, end : int, optional

659 With optional `start`, test beginning at that position. With

660 optional `end`, stop comparing at that position.

661

662 Returns

663 -------

664 out : ndarray

665 Outputs an array of bools.

666

667 See Also

668 --------

669 str.endswith

670

671 Examples

672 --------

673 >>> s = np.array(['foo', 'bar'])

674 >>> s[0] = 'foo'

675 >>> s[1] = 'bar'

676 >>> s

677 array(['foo', 'bar'], dtype='<U3')

678 >>> np.char.endswith(s, 'ar')

679 array([False, True])

680 >>> np.char.endswith(s, 'a', start=1, end=2)

681 array([False, True])

682

683 """

684 return _vec_string(

685 a, bool_, 'endswith', [suffix, start] + _clean_args(end))

686

687

688def _expandtabs_dispatcher(a, tabsize=None):

689 return (a,)

690

691

692@array_function_dispatch(_expandtabs_dispatcher)

693def expandtabs(a, tabsize=8):

694 """

695 Return a copy of each string element where all tab characters are

696 replaced by one or more spaces.

697

698 Calls `str.expandtabs` element-wise.

699

700 Return a copy of each string element where all tab characters are

701 replaced by one or more spaces, depending on the current column

702 and the given `tabsize`. The column number is reset to zero after

703 each newline occurring in the string. This doesn't understand other

704 non-printing characters or escape sequences.

705

706 Parameters

707 ----------

708 a : array_like of str or unicode

709 Input array

710 tabsize : int, optional

711 Replace tabs with `tabsize` number of spaces. If not given defaults

712 to 8 spaces.

713

714 Returns

715 -------

716 out : ndarray

717 Output array of str or unicode, depending on input type

718

719 See Also

720 --------

721 str.expandtabs

722

723 """

724 return _to_string_or_unicode_array(

725 _vec_string(a, object_, 'expandtabs', (tabsize,)))

726

727

728@array_function_dispatch(_count_dispatcher)

729def find(a, sub, start=0, end=None):

730 """

731 For each element, return the lowest index in the string where

732 substring `sub` is found.

733

734 Calls `str.find` element-wise.

735

736 For each element, return the lowest index in the string where

737 substring `sub` is found, such that `sub` is contained in the

738 range [`start`, `end`].

739

740 Parameters

741 ----------

742 a : array_like of str or unicode

743

744 sub : str or unicode

745

746 start, end : int, optional

747 Optional arguments `start` and `end` are interpreted as in

748 slice notation.

749

750 Returns

751 -------

752 out : ndarray or int

753 Output array of ints. Returns -1 if `sub` is not found.

754

755 See Also

756 --------

757 str.find

758

759 Examples

760 --------

761 >>> a = np.array(["NumPy is a Python library"])

762 >>> np.char.find(a, "Python", start=0, end=None)

763 array([11])

764

765 """

766 return _vec_string(

767 a, int_, 'find', [sub, start] + _clean_args(end))

768

769

770@array_function_dispatch(_count_dispatcher)

771def index(a, sub, start=0, end=None):

772 """

773 Like `find`, but raises `ValueError` when the substring is not found.

774

775 Calls `str.index` element-wise.

776

777 Parameters

778 ----------

779 a : array_like of str or unicode

780

781 sub : str or unicode

782

783 start, end : int, optional

784

785 Returns

786 -------

787 out : ndarray

788 Output array of ints. Returns -1 if `sub` is not found.

789

790 See Also

791 --------

792 find, str.find

793

794 Examples

795 --------

796 >>> a = np.array(["Computer Science"])

797 >>> np.char.index(a, "Science", start=0, end=None)

798 array([9])

799

800 """

801 return _vec_string(

802 a, int_, 'index', [sub, start] + _clean_args(end))

803

804

805@array_function_dispatch(_unary_op_dispatcher)

806def isalnum(a):

807 """

808 Returns true for each element if all characters in the string are

809 alphanumeric and there is at least one character, false otherwise.

810

811 Calls `str.isalnum` element-wise.

812

813 For 8-bit strings, this method is locale-dependent.

814

815 Parameters

816 ----------

817 a : array_like of str or unicode

818

819 Returns

820 -------

821 out : ndarray

822 Output array of str or unicode, depending on input type

823

824 See Also

825 --------

826 str.isalnum

827 """

828 return _vec_string(a, bool_, 'isalnum')

829

830

831@array_function_dispatch(_unary_op_dispatcher)

832def isalpha(a):

833 """

834 Returns true for each element if all characters in the string are

835 alphabetic and there is at least one character, false otherwise.

836

837 Calls `str.isalpha` element-wise.

838

839 For 8-bit strings, this method is locale-dependent.

840

841 Parameters

842 ----------

843 a : array_like of str or unicode

844

845 Returns

846 -------

847 out : ndarray

848 Output array of bools

849

850 See Also

851 --------

852 str.isalpha

853 """

854 return _vec_string(a, bool_, 'isalpha')

855

856

857@array_function_dispatch(_unary_op_dispatcher)

858def isdigit(a):

859 """

860 Returns true for each element if all characters in the string are

861 digits and there is at least one character, false otherwise.

862

863 Calls `str.isdigit` element-wise.

864

865 For 8-bit strings, this method is locale-dependent.

866

867 Parameters

868 ----------

869 a : array_like of str or unicode

870

871 Returns

872 -------

873 out : ndarray

874 Output array of bools

875

876 See Also

877 --------

878 str.isdigit

879

880 Examples

881 --------

882 >>> a = np.array(['a', 'b', '0'])

883 >>> np.char.isdigit(a)

884 array([False, False, True])

885 >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])

886 >>> np.char.isdigit(a)

887 array([[False, False, True], [False, True, True]])

888 """

889 return _vec_string(a, bool_, 'isdigit')

890

891

892@array_function_dispatch(_unary_op_dispatcher)

893def islower(a):

894 """

895 Returns true for each element if all cased characters in the

896 string are lowercase and there is at least one cased character,

897 false otherwise.

898

899 Calls `str.islower` element-wise.

900

901 For 8-bit strings, this method is locale-dependent.

902

903 Parameters

904 ----------

905 a : array_like of str or unicode

906

907 Returns

908 -------

909 out : ndarray

910 Output array of bools

911

912 See Also

913 --------

914 str.islower

915 """

916 return _vec_string(a, bool_, 'islower')

917

918

919@array_function_dispatch(_unary_op_dispatcher)

920def isspace(a):

921 """

922 Returns true for each element if there are only whitespace

923 characters in the string and there is at least one character,

924 false otherwise.

925

926 Calls `str.isspace` element-wise.

927

928 For 8-bit strings, this method is locale-dependent.

929

930 Parameters

931 ----------

932 a : array_like of str or unicode

933

934 Returns

935 -------

936 out : ndarray

937 Output array of bools

938

939 See Also

940 --------

941 str.isspace

942 """

943 return _vec_string(a, bool_, 'isspace')

944

945

946@array_function_dispatch(_unary_op_dispatcher)

947def istitle(a):

948 """

949 Returns true for each element if the element is a titlecased

950 string and there is at least one character, false otherwise.

951

952 Call `str.istitle` element-wise.

953

954 For 8-bit strings, this method is locale-dependent.

955

956 Parameters

957 ----------

958 a : array_like of str or unicode

959

960 Returns

961 -------

962 out : ndarray

963 Output array of bools

964

965 See Also

966 --------

967 str.istitle

968 """

969 return _vec_string(a, bool_, 'istitle')

970

971

972@array_function_dispatch(_unary_op_dispatcher)

973def isupper(a):

974 """

975 Return true for each element if all cased characters in the

976 string are uppercase and there is at least one character, false

977 otherwise.

978

979 Call `str.isupper` element-wise.

980

981 For 8-bit strings, this method is locale-dependent.

982

983 Parameters

984 ----------

985 a : array_like of str or unicode

986

987 Returns

988 -------

989 out : ndarray

990 Output array of bools

991

992 See Also

993 --------

994 str.isupper

995

996 Examples

997 --------

998 >>> str = "GHC"

999 >>> np.char.isupper(str)

1000 array(True)

1001 >>> a = np.array(["hello", "HELLO", "Hello"])

1002 >>> np.char.isupper(a)

1003 array([False, True, False])

1004

1005 """

1006 return _vec_string(a, bool_, 'isupper')

1007

1008

1009def _join_dispatcher(sep, seq):

1010 return (sep, seq)

1011

1012

1013@array_function_dispatch(_join_dispatcher)

1014def join(sep, seq):

1015 """

1016 Return a string which is the concatenation of the strings in the

1017 sequence `seq`.

1018

1019 Calls `str.join` element-wise.

1020

1021 Parameters

1022 ----------

1023 sep : array_like of str or unicode

1024 seq : array_like of str or unicode

1025

1026 Returns

1027 -------

1028 out : ndarray

1029 Output array of str or unicode, depending on input types

1030

1031 See Also

1032 --------

1033 str.join

1034

1035 Examples

1036 --------

1037 >>> np.char.join('-', 'osd')

1038 array('o-s-d', dtype='<U5')

1039

1040 >>> np.char.join(['-', '.'], ['ghc', 'osd'])

1041 array(['g-h-c', 'o.s.d'], dtype='<U5')

1042

1043 """

1044 return _to_string_or_unicode_array(

1045 _vec_string(sep, object_, 'join', (seq,)))

1046

1047

1048

1049def _just_dispatcher(a, width, fillchar=None):

1050 return (a,)

1051

1052

1053@array_function_dispatch(_just_dispatcher)

1054def ljust(a, width, fillchar=' '):

1055 """

1056 Return an array with the elements of `a` left-justified in a

1057 string of length `width`.

1058

1059 Calls `str.ljust` element-wise.

1060

1061 Parameters

1062 ----------

1063 a : array_like of str or unicode

1064

1065 width : int

1066 The length of the resulting strings

1067 fillchar : str or unicode, optional

1068 The character to use for padding

1069

1070 Returns

1071 -------

1072 out : ndarray

1073 Output array of str or unicode, depending on input type

1074

1075 See Also

1076 --------

1077 str.ljust

1078

1079 """

1080 a_arr = numpy.asarray(a)

1081 width_arr = numpy.asarray(width)

1082 size = int(numpy.max(width_arr.flat))

1083 if numpy.issubdtype(a_arr.dtype, numpy.string_):

1084 fillchar = asbytes(fillchar)

1085 return _vec_string(

1086 a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))

1087

1088

1089@array_function_dispatch(_unary_op_dispatcher)

1090def lower(a):

1091 """

1092 Return an array with the elements converted to lowercase.

1093

1094 Call `str.lower` element-wise.

1095

1096 For 8-bit strings, this method is locale-dependent.

1097

1098 Parameters

1099 ----------

1100 a : array_like, {str, unicode}

1101 Input array.

1102

1103 Returns

1104 -------

1105 out : ndarray, {str, unicode}

1106 Output array of str or unicode, depending on input type

1107

1108 See Also

1109 --------

1110 str.lower

1111

1112 Examples

1113 --------

1114 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c

1115 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')

1116 >>> np.char.lower(c)

1117 array(['a1b c', '1bca', 'bca1'], dtype='<U5')

1118

1119 """

1120 a_arr = numpy.asarray(a)

1121 return _vec_string(a_arr, a_arr.dtype, 'lower')

1122

1123

1124def _strip_dispatcher(a, chars=None):

1125 return (a,)

1126

1127

1128@array_function_dispatch(_strip_dispatcher)

1129def lstrip(a, chars=None):

1130 """

1131 For each element in `a`, return a copy with the leading characters

1132 removed.

1133

1134 Calls `str.lstrip` element-wise.

1135

1136 Parameters

1137 ----------

1138 a : array-like, {str, unicode}

1139 Input array.

1140

1141 chars : {str, unicode}, optional

1142 The `chars` argument is a string specifying the set of

1143 characters to be removed. If omitted or None, the `chars`

1144 argument defaults to removing whitespace. The `chars` argument

1145 is not a prefix; rather, all combinations of its values are

1146 stripped.

1147

1148 Returns

1149 -------

1150 out : ndarray, {str, unicode}

1151 Output array of str or unicode, depending on input type

1152

1153 See Also

1154 --------

1155 str.lstrip

1156

1157 Examples

1158 --------

1159 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])

1160 >>> c

1161 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

1162

1163 The 'a' variable is unstripped from c[1] because whitespace leading.

1164

1165 >>> np.char.lstrip(c, 'a')

1166 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')

1167

1168

1169 >>> np.char.lstrip(c, 'A') # leaves c unchanged

1170 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

1171 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()

1172 ... # XXX: is this a regression? This used to return True

1173 ... # np.char.lstrip(c,'') does not modify c at all.

1174 False

1175 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()

1176 True

1177

1178 """

1179 a_arr = numpy.asarray(a)

1180 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))

1181

1182

1183def _partition_dispatcher(a, sep):

1184 return (a,)

1185

1186

1187@array_function_dispatch(_partition_dispatcher)

1188def partition(a, sep):

1189 """

1190 Partition each element in `a` around `sep`.

1191

1192 Calls `str.partition` element-wise.

1193

1194 For each element in `a`, split the element as the first

1195 occurrence of `sep`, and return 3 strings containing the part

1196 before the separator, the separator itself, and the part after

1197 the separator. If the separator is not found, return 3 strings

1198 containing the string itself, followed by two empty strings.

1199

1200 Parameters

1201 ----------

1202 a : array_like, {str, unicode}

1203 Input array

1204 sep : {str, unicode}

1205 Separator to split each string element in `a`.

1206

1207 Returns

1208 -------

1209 out : ndarray, {str, unicode}

1210 Output array of str or unicode, depending on input type.

1211 The output array will have an extra dimension with 3

1212 elements per input element.

1213

1214 See Also

1215 --------

1216 str.partition

1217

1218 """

1219 return _to_string_or_unicode_array(

1220 _vec_string(a, object_, 'partition', (sep,)))

1221

1222

1223def _replace_dispatcher(a, old, new, count=None):

1224 return (a,)

1225

1226

1227@array_function_dispatch(_replace_dispatcher)

1228def replace(a, old, new, count=None):

1229 """

1230 For each element in `a`, return a copy of the string with all

1231 occurrences of substring `old` replaced by `new`.

1232

1233 Calls `str.replace` element-wise.

1234

1235 Parameters

1236 ----------

1237 a : array-like of str or unicode

1238

1239 old, new : str or unicode

1240

1241 count : int, optional

1242 If the optional argument `count` is given, only the first

1243 `count` occurrences are replaced.

1244

1245 Returns

1246 -------

1247 out : ndarray

1248 Output array of str or unicode, depending on input type

1249

1250 See Also

1251 --------

1252 str.replace

1253

1254 Examples

1255 --------

1256 >>> a = np.array(["That is a mango", "Monkeys eat mangos"])

1257 >>> np.char.replace(a, 'mango', 'banana')

1258 array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')

1259

1260 >>> a = np.array(["The dish is fresh", "This is it"])

1261 >>> np.char.replace(a, 'is', 'was')

1262 array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')

1263 """

1264 return _to_string_or_unicode_array(

1265 _vec_string(

1266 a, object_, 'replace', [old, new] + _clean_args(count)))

1267

1268

1269@array_function_dispatch(_count_dispatcher)

1270def rfind(a, sub, start=0, end=None):

1271 """

1272 For each element in `a`, return the highest index in the string

1273 where substring `sub` is found, such that `sub` is contained

1274 within [`start`, `end`].

1275

1276 Calls `str.rfind` element-wise.

1277

1278 Parameters

1279 ----------

1280 a : array-like of str or unicode

1281

1282 sub : str or unicode

1283

1284 start, end : int, optional

1285 Optional arguments `start` and `end` are interpreted as in

1286 slice notation.

1287

1288 Returns

1289 -------

1290 out : ndarray

1291 Output array of ints. Return -1 on failure.

1292

1293 See Also

1294 --------

1295 str.rfind

1296

1297 """

1298 return _vec_string(

1299 a, int_, 'rfind', [sub, start] + _clean_args(end))

1300

1301

1302@array_function_dispatch(_count_dispatcher)

1303def rindex(a, sub, start=0, end=None):

1304 """

1305 Like `rfind`, but raises `ValueError` when the substring `sub` is

1306 not found.

1307

1308 Calls `str.rindex` element-wise.

1309

1310 Parameters

1311 ----------

1312 a : array-like of str or unicode

1313

1314 sub : str or unicode

1315

1316 start, end : int, optional

1317

1318 Returns

1319 -------

1320 out : ndarray

1321 Output array of ints.

1322

1323 See Also

1324 --------

1325 rfind, str.rindex

1326

1327 """

1328 return _vec_string(

1329 a, int_, 'rindex', [sub, start] + _clean_args(end))

1330

1331

1332@array_function_dispatch(_just_dispatcher)

1333def rjust(a, width, fillchar=' '):

1334 """

1335 Return an array with the elements of `a` right-justified in a

1336 string of length `width`.

1337

1338 Calls `str.rjust` element-wise.

1339

1340 Parameters

1341 ----------

1342 a : array_like of str or unicode

1343

1344 width : int

1345 The length of the resulting strings

1346 fillchar : str or unicode, optional

1347 The character to use for padding

1348

1349 Returns

1350 -------

1351 out : ndarray

1352 Output array of str or unicode, depending on input type

1353

1354 See Also

1355 --------

1356 str.rjust

1357

1358 """

1359 a_arr = numpy.asarray(a)

1360 width_arr = numpy.asarray(width)

1361 size = int(numpy.max(width_arr.flat))

1362 if numpy.issubdtype(a_arr.dtype, numpy.string_):

1363 fillchar = asbytes(fillchar)

1364 return _vec_string(

1365 a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))

1366

1367

1368@array_function_dispatch(_partition_dispatcher)

1369def rpartition(a, sep):

1370 """

1371 Partition (split) each element around the right-most separator.

1372

1373 Calls `str.rpartition` element-wise.

1374

1375 For each element in `a`, split the element as the last

1376 occurrence of `sep`, and return 3 strings containing the part

1377 before the separator, the separator itself, and the part after

1378 the separator. If the separator is not found, return 3 strings

1379 containing the string itself, followed by two empty strings.

1380

1381 Parameters

1382 ----------

1383 a : array_like of str or unicode

1384 Input array

1385 sep : str or unicode

1386 Right-most separator to split each element in array.

1387

1388 Returns

1389 -------

1390 out : ndarray

1391 Output array of string or unicode, depending on input

1392 type. The output array will have an extra dimension with

1393 3 elements per input element.

1394

1395 See Also

1396 --------

1397 str.rpartition

1398

1399 """

1400 return _to_string_or_unicode_array(

1401 _vec_string(a, object_, 'rpartition', (sep,)))

1402

1403

1404def _split_dispatcher(a, sep=None, maxsplit=None):

1405 return (a,)

1406

1407

1408@array_function_dispatch(_split_dispatcher)

1409def rsplit(a, sep=None, maxsplit=None):

1410 """

1411 For each element in `a`, return a list of the words in the

1412 string, using `sep` as the delimiter string.

1413

1414 Calls `str.rsplit` element-wise.

1415

1416 Except for splitting from the right, `rsplit`

1417 behaves like `split`.

1418

1419 Parameters

1420 ----------

1421 a : array_like of str or unicode

1422

1423 sep : str or unicode, optional

1424 If `sep` is not specified or None, any whitespace string

1425 is a separator.

1426 maxsplit : int, optional

1427 If `maxsplit` is given, at most `maxsplit` splits are done,

1428 the rightmost ones.

1429

1430 Returns

1431 -------

1432 out : ndarray

1433 Array of list objects

1434

1435 See Also

1436 --------

1437 str.rsplit, split

1438

1439 """

1440 # This will return an array of lists of different sizes, so we

1441 # leave it as an object array

1442 return _vec_string(

1443 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))

1444

1445

1446def _strip_dispatcher(a, chars=None):

1447 return (a,)

1448

1449

1450@array_function_dispatch(_strip_dispatcher)

1451def rstrip(a, chars=None):

1452 """

1453 For each element in `a`, return a copy with the trailing

1454 characters removed.

1455

1456 Calls `str.rstrip` element-wise.

1457

1458 Parameters

1459 ----------

1460 a : array-like of str or unicode

1461

1462 chars : str or unicode, optional

1463 The `chars` argument is a string specifying the set of

1464 characters to be removed. If omitted or None, the `chars`

1465 argument defaults to removing whitespace. The `chars` argument

1466 is not a suffix; rather, all combinations of its values are

1467 stripped.

1468

1469 Returns

1470 -------

1471 out : ndarray

1472 Output array of str or unicode, depending on input type

1473

1474 See Also

1475 --------

1476 str.rstrip

1477

1478 Examples

1479 --------

1480 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c

1481 array(['aAaAaA', 'abBABba'],

1482 dtype='|S7')

1483 >>> np.char.rstrip(c, b'a')

1484 array(['aAaAaA', 'abBABb'],

1485 dtype='|S7')

1486 >>> np.char.rstrip(c, b'A')

1487 array(['aAaAa', 'abBABba'],

1488 dtype='|S7')

1489

1490 """

1491 a_arr = numpy.asarray(a)

1492 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))

1493

1494

1495@array_function_dispatch(_split_dispatcher)

1496def split(a, sep=None, maxsplit=None):

1497 """

1498 For each element in `a`, return a list of the words in the

1499 string, using `sep` as the delimiter string.

1500

1501 Calls `str.split` element-wise.

1502

1503 Parameters

1504 ----------

1505 a : array_like of str or unicode

1506

1507 sep : str or unicode, optional

1508 If `sep` is not specified or None, any whitespace string is a

1509 separator.

1510

1511 maxsplit : int, optional

1512 If `maxsplit` is given, at most `maxsplit` splits are done.

1513

1514 Returns

1515 -------

1516 out : ndarray

1517 Array of list objects

1518

1519 See Also

1520 --------

1521 str.split, rsplit

1522

1523 """

1524 # This will return an array of lists of different sizes, so we

1525 # leave it as an object array

1526 return _vec_string(

1527 a, object_, 'split', [sep] + _clean_args(maxsplit))

1528

1529

1530def _splitlines_dispatcher(a, keepends=None):

1531 return (a,)

1532

1533

1534@array_function_dispatch(_splitlines_dispatcher)

1535def splitlines(a, keepends=None):

1536 """

1537 For each element in `a`, return a list of the lines in the

1538 element, breaking at line boundaries.

1539

1540 Calls `str.splitlines` element-wise.

1541

1542 Parameters

1543 ----------

1544 a : array_like of str or unicode

1545

1546 keepends : bool, optional

1547 Line breaks are not included in the resulting list unless

1548 keepends is given and true.

1549

1550 Returns

1551 -------

1552 out : ndarray

1553 Array of list objects

1554

1555 See Also

1556 --------

1557 str.splitlines

1558

1559 """

1560 return _vec_string(

1561 a, object_, 'splitlines', _clean_args(keepends))

1562

1563

1564def _startswith_dispatcher(a, prefix, start=None, end=None):

1565 return (a,)

1566

1567

1568@array_function_dispatch(_startswith_dispatcher)

1569def startswith(a, prefix, start=0, end=None):

1570 """

1571 Returns a boolean array which is `True` where the string element

1572 in `a` starts with `prefix`, otherwise `False`.

1573

1574 Calls `str.startswith` element-wise.

1575

1576 Parameters

1577 ----------

1578 a : array_like of str or unicode

1579

1580 prefix : str

1581

1582 start, end : int, optional

1583 With optional `start`, test beginning at that position. With

1584 optional `end`, stop comparing at that position.

1585

1586 Returns

1587 -------

1588 out : ndarray

1589 Array of booleans

1590

1591 See Also

1592 --------

1593 str.startswith

1594

1595 """

1596 return _vec_string(

1597 a, bool_, 'startswith', [prefix, start] + _clean_args(end))

1598

1599

1600@array_function_dispatch(_strip_dispatcher)

1601def strip(a, chars=None):

1602 """

1603 For each element in `a`, return a copy with the leading and

1604 trailing characters removed.

1605

1606 Calls `str.strip` element-wise.

1607

1608 Parameters

1609 ----------

1610 a : array-like of str or unicode

1611

1612 chars : str or unicode, optional

1613 The `chars` argument is a string specifying the set of

1614 characters to be removed. If omitted or None, the `chars`

1615 argument defaults to removing whitespace. The `chars` argument

1616 is not a prefix or suffix; rather, all combinations of its

1617 values are stripped.

1618

1619 Returns

1620 -------

1621 out : ndarray

1622 Output array of str or unicode, depending on input type

1623

1624 See Also

1625 --------

1626 str.strip

1627

1628 Examples

1629 --------

1630 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])

1631 >>> c

1632 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')

1633 >>> np.char.strip(c)

1634 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')

1635 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads

1636 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')

1637 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails

1638 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')

1639

1640 """

1641 a_arr = numpy.asarray(a)

1642 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))

1643

1644

1645@array_function_dispatch(_unary_op_dispatcher)

1646def swapcase(a):

1647 """

1648 Return element-wise a copy of the string with

1649 uppercase characters converted to lowercase and vice versa.

1650

1651 Calls `str.swapcase` element-wise.

1652

1653 For 8-bit strings, this method is locale-dependent.

1654

1655 Parameters

1656 ----------

1657 a : array_like, {str, unicode}

1658 Input array.

1659

1660 Returns

1661 -------

1662 out : ndarray, {str, unicode}

1663 Output array of str or unicode, depending on input type

1664

1665 See Also

1666 --------

1667 str.swapcase

1668

1669 Examples

1670 --------

1671 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c

1672 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],

1673 dtype='|S5')

1674 >>> np.char.swapcase(c)

1675 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],

1676 dtype='|S5')

1677

1678 """

1679 a_arr = numpy.asarray(a)

1680 return _vec_string(a_arr, a_arr.dtype, 'swapcase')

1681

1682

1683@array_function_dispatch(_unary_op_dispatcher)

1684def title(a):

1685 """

1686 Return element-wise title cased version of string or unicode.

1687

1688 Title case words start with uppercase characters, all remaining cased

1689 characters are lowercase.

1690

1691 Calls `str.title` element-wise.

1692

1693 For 8-bit strings, this method is locale-dependent.

1694

1695 Parameters

1696 ----------

1697 a : array_like, {str, unicode}

1698 Input array.

1699

1700 Returns

1701 -------

1702 out : ndarray

1703 Output array of str or unicode, depending on input type

1704

1705 See Also

1706 --------

1707 str.title

1708

1709 Examples

1710 --------

1711 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c

1712 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],

1713 dtype='|S5')

1714 >>> np.char.title(c)

1715 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],

1716 dtype='|S5')

1717

1718 """

1719 a_arr = numpy.asarray(a)

1720 return _vec_string(a_arr, a_arr.dtype, 'title')

1721

1722

1723def _translate_dispatcher(a, table, deletechars=None):

1724 return (a,)

1725

1726

1727@array_function_dispatch(_translate_dispatcher)

1728def translate(a, table, deletechars=None):

1729 """

1730 For each element in `a`, return a copy of the string where all

1731 characters occurring in the optional argument `deletechars` are

1732 removed, and the remaining characters have been mapped through the

1733 given translation table.

1734

1735 Calls `str.translate` element-wise.

1736

1737 Parameters

1738 ----------

1739 a : array-like of str or unicode

1740

1741 table : str of length 256

1742

1743 deletechars : str

1744

1745 Returns

1746 -------

1747 out : ndarray

1748 Output array of str or unicode, depending on input type

1749

1750 See Also

1751 --------

1752 str.translate

1753

1754 """

1755 a_arr = numpy.asarray(a)

1756 if issubclass(a_arr.dtype.type, unicode_):

1757 return _vec_string(

1758 a_arr, a_arr.dtype, 'translate', (table,))

1759 else:

1760 return _vec_string(

1761 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))

1762

1763

1764@array_function_dispatch(_unary_op_dispatcher)

1765def upper(a):

1766 """

1767 Return an array with the elements converted to uppercase.

1768

1769 Calls `str.upper` element-wise.

1770

1771 For 8-bit strings, this method is locale-dependent.

1772

1773 Parameters

1774 ----------

1775 a : array_like, {str, unicode}

1776 Input array.

1777

1778 Returns

1779 -------

1780 out : ndarray, {str, unicode}

1781 Output array of str or unicode, depending on input type

1782

1783 See Also

1784 --------

1785 str.upper

1786

1787 Examples

1788 --------

1789 >>> c = np.array(['a1b c', '1bca', 'bca1']); c

1790 array(['a1b c', '1bca', 'bca1'], dtype='<U5')

1791 >>> np.char.upper(c)

1792 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')

1793

1794 """

1795 a_arr = numpy.asarray(a)

1796 return _vec_string(a_arr, a_arr.dtype, 'upper')

1797

1798

1799def _zfill_dispatcher(a, width):

1800 return (a,)

1801

1802

1803@array_function_dispatch(_zfill_dispatcher)

1804def zfill(a, width):

1805 """

1806 Return the numeric string left-filled with zeros

1807

1808 Calls `str.zfill` element-wise.

1809

1810 Parameters

1811 ----------

1812 a : array_like, {str, unicode}

1813 Input array.

1814 width : int

1815 Width of string to left-fill elements in `a`.

1816

1817 Returns

1818 -------

1819 out : ndarray, {str, unicode}

1820 Output array of str or unicode, depending on input type

1821

1822 See Also

1823 --------

1824 str.zfill

1825

1826 """

1827 a_arr = numpy.asarray(a)

1828 width_arr = numpy.asarray(width)

1829 size = int(numpy.max(width_arr.flat))

1830 return _vec_string(

1831 a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))

1832

1833

1834@array_function_dispatch(_unary_op_dispatcher)

1835def isnumeric(a):

1836 """

1837 For each element, return True if there are only numeric

1838 characters in the element.

1839

1840 Calls `unicode.isnumeric` element-wise.

1841

1842 Numeric characters include digit characters, and all characters

1843 that have the Unicode numeric value property, e.g. ``U+2155,

1844 VULGAR FRACTION ONE FIFTH``.

1845

1846 Parameters

1847 ----------

1848 a : array_like, unicode

1849 Input array.

1850

1851 Returns

1852 -------

1853 out : ndarray, bool

1854 Array of booleans of same shape as `a`.

1855

1856 See Also

1857 --------

1858 unicode.isnumeric

1859

1860 Examples

1861 --------

1862 >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])

1863 array([ True, False, False, False, False])

1864

1865 """

1866 if _use_unicode(a) != unicode_:

1867 raise TypeError("isnumeric is only available for Unicode strings and arrays")

1868 return _vec_string(a, bool_, 'isnumeric')

1869

1870

1871@array_function_dispatch(_unary_op_dispatcher)

1872def isdecimal(a):

1873 """

1874 For each element, return True if there are only decimal

1875 characters in the element.

1876

1877 Calls `unicode.isdecimal` element-wise.

1878

1879 Decimal characters include digit characters, and all characters

1880 that can be used to form decimal-radix numbers,

1881 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.

1882

1883 Parameters

1884 ----------

1885 a : array_like, unicode

1886 Input array.

1887

1888 Returns

1889 -------

1890 out : ndarray, bool

1891 Array of booleans identical in shape to `a`.

1892

1893 See Also

1894 --------

1895 unicode.isdecimal

1896

1897 Examples

1898 --------

1899 >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])

1900 array([ True, False, False, False])

1901

1902 """

1903 if _use_unicode(a) != unicode_:

1904 raise TypeError("isnumeric is only available for Unicode strings and arrays")

1905 return _vec_string(a, bool_, 'isdecimal')

1906

1907

1908@set_module('numpy')

1909class chararray(ndarray):

1910 """

1911 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,

1912 strides=None, order=None)

1913

1914 Provides a convenient view on arrays of string and unicode values.

1915

1916 .. note::

1917 The `chararray` class exists for backwards compatibility with

1918 Numarray, it is not recommended for new development. Starting from numpy

1919 1.4, if one needs arrays of strings, it is recommended to use arrays of

1920 `dtype` `object_`, `string_` or `unicode_`, and use the free functions

1921 in the `numpy.char` module for fast vectorized string operations.

1922

1923 Versus a regular NumPy array of type `str` or `unicode`, this

1924 class adds the following functionality:

1925

1926 1) values automatically have whitespace removed from the end

1927 when indexed

1928

1929 2) comparison operators automatically remove whitespace from the

1930 end when comparing values

1931

1932 3) vectorized string operations are provided as methods

1933 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)

1934

1935 chararrays should be created using `numpy.char.array` or

1936 `numpy.char.asarray`, rather than this constructor directly.

1937

1938 This constructor creates the array, using `buffer` (with `offset`

1939 and `strides`) if it is not ``None``. If `buffer` is ``None``, then

1940 constructs a new array with `strides` in "C order", unless both

1941 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`

1942 is in "Fortran order".

1943

1944 Methods

1945 -------

1946 astype

1947 argsort

1948 copy

1949 count

1950 decode

1951 dump

1952 dumps

1953 encode

1954 endswith

1955 expandtabs

1956 fill

1957 find

1958 flatten

1959 getfield

1960 index

1961 isalnum

1962 isalpha

1963 isdecimal

1964 isdigit

1965 islower

1966 isnumeric

1967 isspace

1968 istitle

1969 isupper

1970 item

1971 join

1972 ljust

1973 lower

1974 lstrip

1975 nonzero

1976 put

1977 ravel

1978 repeat

1979 replace

1980 reshape

1981 resize

1982 rfind

1983 rindex

1984 rjust

1985 rsplit

1986 rstrip

1987 searchsorted

1988 setfield

1989 setflags

1990 sort

1991 split

1992 splitlines

1993 squeeze

1994 startswith

1995 strip

1996 swapaxes

1997 swapcase

1998 take

1999 title

2000 tofile

2001 tolist

2002 tostring

2003 translate

2004 transpose

2005 upper

2006 view

2007 zfill

2008

2009 Parameters

2010 ----------

2011 shape : tuple

2012 Shape of the array.

2013 itemsize : int, optional

2014 Length of each array element, in number of characters. Default is 1.

2015 unicode : bool, optional

2016 Are the array elements of type unicode (True) or string (False).

2017 Default is False.

2018 buffer : object exposing the buffer interface or str, optional

2019 Memory address of the start of the array data. Default is None,

2020 in which case a new array is created.

2021 offset : int, optional

2022 Fixed stride displacement from the beginning of an axis?

2023 Default is 0. Needs to be >=0.

2024 strides : array_like of ints, optional

2025 Strides for the array (see `ndarray.strides` for full description).

2026 Default is None.

2027 order : {'C', 'F'}, optional

2028 The order in which the array data is stored in memory: 'C' ->

2029 "row major" order (the default), 'F' -> "column major"

2030 (Fortran) order.

2031

2032 Examples

2033 --------

2034 >>> charar = np.chararray((3, 3))

2035 >>> charar[:] = 'a'

2036 >>> charar

2037 chararray([[b'a', b'a', b'a'],

2038 [b'a', b'a', b'a'],

2039 [b'a', b'a', b'a']], dtype='|S1')

2040

2041 >>> charar = np.chararray(charar.shape, itemsize=5)

2042 >>> charar[:] = 'abc'

2043 >>> charar

2044 chararray([[b'abc', b'abc', b'abc'],

2045 [b'abc', b'abc', b'abc'],

2046 [b'abc', b'abc', b'abc']], dtype='|S5')

2047

2048 """

2049 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,

2050 offset=0, strides=None, order='C'):

2051 global _globalvar

2052

2053 if unicode:

2054 dtype = unicode_

2055 else:

2056 dtype = string_

2057

2058 # force itemsize to be a Python int, since using NumPy integer

2059 # types results in itemsize.itemsize being used as the size of

2060 # strings in the new array.

2061 itemsize = int(itemsize)

2062

2063 if isinstance(buffer, str):

2064 # unicode objects do not have the buffer interface

2065 filler = buffer

2066 buffer = None

2067 else:

2068 filler = None

2069

2070 _globalvar = 1

2071 if buffer is None:

2072 self = ndarray.__new__(subtype, shape, (dtype, itemsize),

2073 order=order)

2074 else:

2075 self = ndarray.__new__(subtype, shape, (dtype, itemsize),

2076 buffer=buffer,

2077 offset=offset, strides=strides,

2078 order=order)

2079 if filler is not None:

2080 self[...] = filler

2081 _globalvar = 0

2082 return self

2083

2084 def __array_finalize__(self, obj):

2085 # The b is a special case because it is used for reconstructing.

2086 if not _globalvar and self.dtype.char not in 'SUbc':

2087 raise ValueError("Can only create a chararray from string data.")

2088

2089 def __getitem__(self, obj):

2090 val = ndarray.__getitem__(self, obj)

2091

2092 if isinstance(val, character):

2093 temp = val.rstrip()

2094 if len(temp) == 0:

2095 val = ''

2096 else:

2097 val = temp

2098

2099 return val

2100

2101 # IMPLEMENTATION NOTE: Most of the methods of this class are

2102 # direct delegations to the free functions in this module.

2103 # However, those that return an array of strings should instead

2104 # return a chararray, so some extra wrapping is required.

2105

2106 def __eq__(self, other):

2107 """

2108 Return (self == other) element-wise.

2109

2110 See Also

2111 --------

2112 equal

2113 """

2114 return equal(self, other)

2115

2116 def __ne__(self, other):

2117 """

2118 Return (self != other) element-wise.

2119

2120 See Also

2121 --------

2122 not_equal

2123 """

2124 return not_equal(self, other)

2125

2126 def __ge__(self, other):

2127 """

2128 Return (self >= other) element-wise.

2129

2130 See Also

2131 --------

2132 greater_equal

2133 """

2134 return greater_equal(self, other)

2135

2136 def __le__(self, other):

2137 """

2138 Return (self <= other) element-wise.

2139

2140 See Also

2141 --------

2142 less_equal

2143 """

2144 return less_equal(self, other)

2145

2146 def __gt__(self, other):

2147 """

2148 Return (self > other) element-wise.

2149

2150 See Also

2151 --------

2152 greater

2153 """

2154 return greater(self, other)

2155

2156 def __lt__(self, other):

2157 """

2158 Return (self < other) element-wise.

2159

2160 See Also

2161 --------

2162 less

2163 """

2164 return less(self, other)

2165

2166 def __add__(self, other):

2167 """

2168 Return (self + other), that is string concatenation,

2169 element-wise for a pair of array_likes of str or unicode.

2170

2171 See Also

2172 --------

2173 add

2174 """

2175 return asarray(add(self, other))

2176

2177 def __radd__(self, other):

2178 """

2179 Return (other + self), that is string concatenation,

2180 element-wise for a pair of array_likes of `string_` or `unicode_`.

2181

2182 See Also

2183 --------

2184 add

2185 """

2186 return asarray(add(numpy.asarray(other), self))

2187

2188 def __mul__(self, i):

2189 """

2190 Return (self * i), that is string multiple concatenation,

2191 element-wise.

2192

2193 See Also

2194 --------

2195 multiply

2196 """

2197 return asarray(multiply(self, i))

2198

2199 def __rmul__(self, i):

2200 """

2201 Return (self * i), that is string multiple concatenation,

2202 element-wise.

2203

2204 See Also

2205 --------

2206 multiply

2207 """

2208 return asarray(multiply(self, i))

2209

2210 def __mod__(self, i):

2211 """

2212 Return (self % i), that is pre-Python 2.6 string formatting

2213 (interpolation), element-wise for a pair of array_likes of `string_`

2214 or `unicode_`.

2215

2216 See Also

2217 --------

2218 mod

2219 """

2220 return asarray(mod(self, i))

2221

2222 def __rmod__(self, other):

2223 return NotImplemented

2224

2225 def argsort(self, axis=-1, kind=None, order=None):

2226 """

2227 Return the indices that sort the array lexicographically.

2228

2229 For full documentation see `numpy.argsort`, for which this method is

2230 in fact merely a "thin wrapper."

2231

2232 Examples

2233 --------

2234 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')

2235 >>> c = c.view(np.chararray); c

2236 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],

2237 dtype='|S5')

2238 >>> c[c.argsort()]

2239 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],

2240 dtype='|S5')

2241

2242 """

2243 return self.__array__().argsort(axis, kind, order)

2244 argsort.__doc__ = ndarray.argsort.__doc__

2245

2246 def capitalize(self):

2247 """

2248 Return a copy of `self` with only the first character of each element

2249 capitalized.

2250

2251 See Also

2252 --------

2253 char.capitalize

2254

2255 """

2256 return asarray(capitalize(self))

2257

2258 def center(self, width, fillchar=' '):

2259 """

2260 Return a copy of `self` with its elements centered in a

2261 string of length `width`.

2262

2263 See Also

2264 --------

2265 center

2266 """

2267 return asarray(center(self, width, fillchar))

2268

2269 def count(self, sub, start=0, end=None):

2270 """

2271 Returns an array with the number of non-overlapping occurrences of

2272 substring `sub` in the range [`start`, `end`].

2273

2274 See Also

2275 --------

2276 char.count

2277

2278 """

2279 return count(self, sub, start, end)

2280

2281 def decode(self, encoding=None, errors=None):

2282 """

2283 Calls ``bytes.decode`` element-wise.

2284

2285 See Also

2286 --------

2287 char.decode

2288

2289 """

2290 return decode(self, encoding, errors)

2291

2292 def encode(self, encoding=None, errors=None):

2293 """

2294 Calls `str.encode` element-wise.

2295

2296 See Also

2297 --------

2298 char.encode

2299

2300 """

2301 return encode(self, encoding, errors)

2302

2303 def endswith(self, suffix, start=0, end=None):

2304 """

2305 Returns a boolean array which is `True` where the string element

2306 in `self` ends with `suffix`, otherwise `False`.

2307

2308 See Also

2309 --------

2310 char.endswith

2311

2312 """

2313 return endswith(self, suffix, start, end)

2314

2315 def expandtabs(self, tabsize=8):

2316 """

2317 Return a copy of each string element where all tab characters are

2318 replaced by one or more spaces.

2319

2320 See Also

2321 --------

2322 char.expandtabs

2323

2324 """

2325 return asarray(expandtabs(self, tabsize))

2326

2327 def find(self, sub, start=0, end=None):

2328 """

2329 For each element, return the lowest index in the string where

2330 substring `sub` is found.

2331

2332 See Also

2333 --------

2334 char.find

2335

2336 """

2337 return find(self, sub, start, end)

2338

2339 def index(self, sub, start=0, end=None):

2340 """

2341 Like `find`, but raises `ValueError` when the substring is not found.

2342

2343 See Also

2344 --------

2345 char.index

2346

2347 """

2348 return index(self, sub, start, end)

2349

2350 def isalnum(self):

2351 """

2352 Returns true for each element if all characters in the string

2353 are alphanumeric and there is at least one character, false

2354 otherwise.

2355

2356 See Also

2357 --------

2358 char.isalnum

2359

2360 """

2361 return isalnum(self)

2362

2363 def isalpha(self):

2364 """

2365 Returns true for each element if all characters in the string

2366 are alphabetic and there is at least one character, false

2367 otherwise.

2368

2369 See Also

2370 --------

2371 char.isalpha

2372

2373 """

2374 return isalpha(self)

2375

2376 def isdigit(self):

2377 """

2378 Returns true for each element if all characters in the string are

2379 digits and there is at least one character, false otherwise.

2380

2381 See Also

2382 --------

2383 char.isdigit

2384

2385 """

2386 return isdigit(self)

2387

2388 def islower(self):

2389 """

2390 Returns true for each element if all cased characters in the

2391 string are lowercase and there is at least one cased character,

2392 false otherwise.

2393

2394 See Also

2395 --------

2396 char.islower

2397

2398 """

2399 return islower(self)

2400

2401 def isspace(self):

2402 """

2403 Returns true for each element if there are only whitespace

2404 characters in the string and there is at least one character,

2405 false otherwise.

2406

2407 See Also

2408 --------

2409 char.isspace

2410

2411 """

2412 return isspace(self)

2413

2414 def istitle(self):

2415 """

2416 Returns true for each element if the element is a titlecased

2417 string and there is at least one character, false otherwise.

2418

2419 See Also

2420 --------

2421 char.istitle

2422

2423 """

2424 return istitle(self)

2425

2426 def isupper(self):

2427 """

2428 Returns true for each element if all cased characters in the

2429 string are uppercase and there is at least one character, false

2430 otherwise.

2431

2432 See Also

2433 --------

2434 char.isupper

2435

2436 """

2437 return isupper(self)

2438

2439 def join(self, seq):

2440 """

2441 Return a string which is the concatenation of the strings in the

2442 sequence `seq`.

2443

2444 See Also

2445 --------

2446 char.join

2447

2448 """

2449 return join(self, seq)

2450

2451 def ljust(self, width, fillchar=' '):

2452 """

2453 Return an array with the elements of `self` left-justified in a

2454 string of length `width`.

2455

2456 See Also

2457 --------

2458 char.ljust

2459

2460 """

2461 return asarray(ljust(self, width, fillchar))

2462

2463 def lower(self):

2464 """

2465 Return an array with the elements of `self` converted to

2466 lowercase.

2467

2468 See Also

2469 --------

2470 char.lower

2471

2472 """

2473 return asarray(lower(self))

2474

2475 def lstrip(self, chars=None):

2476 """

2477 For each element in `self`, return a copy with the leading characters

2478 removed.

2479

2480 See Also

2481 --------

2482 char.lstrip

2483

2484 """

2485 return asarray(lstrip(self, chars))

2486

2487 def partition(self, sep):

2488 """

2489 Partition each element in `self` around `sep`.

2490

2491 See Also

2492 --------

2493 partition

2494 """

2495 return asarray(partition(self, sep))

2496

2497 def replace(self, old, new, count=None):

2498 """

2499 For each element in `self`, return a copy of the string with all

2500 occurrences of substring `old` replaced by `new`.

2501

2502 See Also

2503 --------

2504 char.replace

2505

2506 """

2507 return asarray(replace(self, old, new, count))

2508

2509 def rfind(self, sub, start=0, end=None):

2510 """

2511 For each element in `self`, return the highest index in the string

2512 where substring `sub` is found, such that `sub` is contained

2513 within [`start`, `end`].

2514

2515 See Also

2516 --------

2517 char.rfind

2518

2519 """

2520 return rfind(self, sub, start, end)

2521

2522 def rindex(self, sub, start=0, end=None):

2523 """

2524 Like `rfind`, but raises `ValueError` when the substring `sub` is

2525 not found.

2526

2527 See Also

2528 --------

2529 char.rindex

2530

2531 """

2532 return rindex(self, sub, start, end)

2533

2534 def rjust(self, width, fillchar=' '):

2535 """

2536 Return an array with the elements of `self`

2537 right-justified in a string of length `width`.

2538

2539 See Also

2540 --------

2541 char.rjust

2542

2543 """

2544 return asarray(rjust(self, width, fillchar))

2545

2546 def rpartition(self, sep):

2547 """

2548 Partition each element in `self` around `sep`.

2549

2550 See Also

2551 --------

2552 rpartition

2553 """

2554 return asarray(rpartition(self, sep))

2555

2556 def rsplit(self, sep=None, maxsplit=None):

2557 """

2558 For each element in `self`, return a list of the words in

2559 the string, using `sep` as the delimiter string.

2560

2561 See Also

2562 --------

2563 char.rsplit

2564

2565 """

2566 return rsplit(self, sep, maxsplit)

2567

2568 def rstrip(self, chars=None):

2569 """

2570 For each element in `self`, return a copy with the trailing

2571 characters removed.

2572

2573 See Also

2574 --------

2575 char.rstrip

2576

2577 """

2578 return asarray(rstrip(self, chars))

2579

2580 def split(self, sep=None, maxsplit=None):

2581 """

2582 For each element in `self`, return a list of the words in the

2583 string, using `sep` as the delimiter string.

2584

2585 See Also

2586 --------

2587 char.split

2588

2589 """

2590 return split(self, sep, maxsplit)

2591

2592 def splitlines(self, keepends=None):

2593 """

2594 For each element in `self`, return a list of the lines in the

2595 element, breaking at line boundaries.

2596

2597 See Also

2598 --------

2599 char.splitlines

2600

2601 """

2602 return splitlines(self, keepends)

2603

2604 def startswith(self, prefix, start=0, end=None):

2605 """

2606 Returns a boolean array which is `True` where the string element

2607 in `self` starts with `prefix`, otherwise `False`.

2608

2609 See Also

2610 --------

2611 char.startswith

2612

2613 """

2614 return startswith(self, prefix, start, end)

2615

2616 def strip(self, chars=None):

2617 """

2618 For each element in `self`, return a copy with the leading and

2619 trailing characters removed.

2620

2621 See Also

2622 --------

2623 char.strip

2624

2625 """

2626 return asarray(strip(self, chars))

2627

2628 def swapcase(self):

2629 """

2630 For each element in `self`, return a copy of the string with

2631 uppercase characters converted to lowercase and vice versa.

2632

2633 See Also

2634 --------

2635 char.swapcase

2636

2637 """

2638 return asarray(swapcase(self))

2639

2640 def title(self):

2641 """

2642 For each element in `self`, return a titlecased version of the

2643 string: words start with uppercase characters, all remaining cased

2644 characters are lowercase.

2645

2646 See Also

2647 --------

2648 char.title

2649

2650 """

2651 return asarray(title(self))

2652

2653 def translate(self, table, deletechars=None):

2654 """

2655 For each element in `self`, return a copy of the string where

2656 all characters occurring in the optional argument

2657 `deletechars` are removed, and the remaining characters have

2658 been mapped through the given translation table.

2659

2660 See Also

2661 --------

2662 char.translate

2663

2664 """

2665 return asarray(translate(self, table, deletechars))

2666

2667 def upper(self):

2668 """

2669 Return an array with the elements of `self` converted to

2670 uppercase.

2671

2672 See Also

2673 --------

2674 char.upper

2675

2676 """

2677 return asarray(upper(self))

2678

2679 def zfill(self, width):

2680 """

2681 Return the numeric string left-filled with zeros in a string of

2682 length `width`.

2683

2684 See Also

2685 --------

2686 char.zfill

2687

2688 """

2689 return asarray(zfill(self, width))

2690

2691 def isnumeric(self):

2692 """

2693 For each element in `self`, return True if there are only

2694 numeric characters in the element.

2695

2696 See Also

2697 --------

2698 char.isnumeric

2699

2700 """

2701 return isnumeric(self)

2702

2703 def isdecimal(self):

2704 """

2705 For each element in `self`, return True if there are only

2706 decimal characters in the element.

2707

2708 See Also

2709 --------

2710 char.isdecimal

2711

2712 """

2713 return isdecimal(self)

2714

2715

2716@set_module("numpy.char")

2717def array(obj, itemsize=None, copy=True, unicode=None, order=None):

2718 """

2719 Create a `chararray`.

2720

2721 .. note::

2722 This class is provided for numarray backward-compatibility.

2723 New code (not concerned with numarray compatibility) should use

2724 arrays of type `string_` or `unicode_` and use the free functions

2725 in :mod:`numpy.char <numpy.core.defchararray>` for fast

2726 vectorized string operations instead.

2727

2728 Versus a regular NumPy array of type `str` or `unicode`, this

2729 class adds the following functionality:

2730

2731 1) values automatically have whitespace removed from the end

2732 when indexed

2733

2734 2) comparison operators automatically remove whitespace from the

2735 end when comparing values

2736

2737 3) vectorized string operations are provided as methods

2738 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)

2739

2740 Parameters

2741 ----------

2742 obj : array of str or unicode-like

2743

2744 itemsize : int, optional

2745 `itemsize` is the number of characters per scalar in the

2746 resulting array. If `itemsize` is None, and `obj` is an

2747 object array or a Python list, the `itemsize` will be

2748 automatically determined. If `itemsize` is provided and `obj`

2749 is of type str or unicode, then the `obj` string will be

2750 chunked into `itemsize` pieces.

2751

2752 copy : bool, optional

2753 If true (default), then the object is copied. Otherwise, a copy

2754 will only be made if __array__ returns a copy, if obj is a

2755 nested sequence, or if a copy is needed to satisfy any of the other

2756 requirements (`itemsize`, unicode, `order`, etc.).

2757

2758 unicode : bool, optional

2759 When true, the resulting `chararray` can contain Unicode

2760 characters, when false only 8-bit characters. If unicode is

2761 None and `obj` is one of the following:

2762

2763 - a `chararray`,

2764 - an ndarray of type `str` or `unicode`

2765 - a Python str or unicode object,

2766

2767 then the unicode setting of the output array will be

2768 automatically determined.

2769

2770 order : {'C', 'F', 'A'}, optional

2771 Specify the order of the array. If order is 'C' (default), then the

2772 array will be in C-contiguous order (last-index varies the

2773 fastest). If order is 'F', then the returned array

2774 will be in Fortran-contiguous order (first-index varies the

2775 fastest). If order is 'A', then the returned array may

2776 be in any order (either C-, Fortran-contiguous, or even

2777 discontiguous).

2778 """

2779 if isinstance(obj, (bytes, str)):

2780 if unicode is None:

2781 if isinstance(obj, str):

2782 unicode = True

2783 else:

2784 unicode = False

2785

2786 if itemsize is None:

2787 itemsize = len(obj)

2788 shape = len(obj) // itemsize

2789

2790 return chararray(shape, itemsize=itemsize, unicode=unicode,

2791 buffer=obj, order=order)

2792

2793 if isinstance(obj, (list, tuple)):

2794 obj = numpy.asarray(obj)

2795

2796 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):

2797 # If we just have a vanilla chararray, create a chararray

2798 # view around it.

2799 if not isinstance(obj, chararray):

2800 obj = obj.view(chararray)

2801

2802 if itemsize is None:

2803 itemsize = obj.itemsize

2804 # itemsize is in 8-bit chars, so for Unicode, we need

2805 # to divide by the size of a single Unicode character,

2806 # which for NumPy is always 4

2807 if issubclass(obj.dtype.type, unicode_):

2808 itemsize //= 4

2809

2810 if unicode is None:

2811 if issubclass(obj.dtype.type, unicode_):

2812 unicode = True

2813 else:

2814 unicode = False

2815

2816 if unicode:

2817 dtype = unicode_

2818 else:

2819 dtype = string_

2820

2821 if order is not None:

2822 obj = numpy.asarray(obj, order=order)

2823 if (copy or

2824 (itemsize != obj.itemsize) or

2825 (not unicode and isinstance(obj, unicode_)) or

2826 (unicode and isinstance(obj, string_))):

2827 obj = obj.astype((dtype, int(itemsize)))

2828 return obj

2829

2830 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):

2831 if itemsize is None:

2832 # Since no itemsize was specified, convert the input array to

2833 # a list so the ndarray constructor will automatically

2834 # determine the itemsize for us.

2835 obj = obj.tolist()

2836 # Fall through to the default case

2837

2838 if unicode:

2839 dtype = unicode_

2840 else:

2841 dtype = string_

2842

2843 if itemsize is None:

2844 val = narray(obj, dtype=dtype, order=order, subok=True)

2845 else:

2846 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)

2847 return val.view(chararray)

2848

2849

2850@set_module("numpy.char")

2851def asarray(obj, itemsize=None, unicode=None, order=None):

2852 """

2853 Convert the input to a `chararray`, copying the data only if

2854 necessary.

2855

2856 Versus a regular NumPy array of type `str` or `unicode`, this

2857 class adds the following functionality:

2858

2859 1) values automatically have whitespace removed from the end

2860 when indexed

2861

2862 2) comparison operators automatically remove whitespace from the

2863 end when comparing values

2864

2865 3) vectorized string operations are provided as methods

2866 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)

2867

2868 Parameters

2869 ----------

2870 obj : array of str or unicode-like

2871

2872 itemsize : int, optional

2873 `itemsize` is the number of characters per scalar in the

2874 resulting array. If `itemsize` is None, and `obj` is an

2875 object array or a Python list, the `itemsize` will be

2876 automatically determined. If `itemsize` is provided and `obj`

2877 is of type str or unicode, then the `obj` string will be

2878 chunked into `itemsize` pieces.

2879

2880 unicode : bool, optional

2881 When true, the resulting `chararray` can contain Unicode

2882 characters, when false only 8-bit characters. If unicode is

2883 None and `obj` is one of the following:

2884

2885 - a `chararray`,

2886 - an ndarray of type `str` or 'unicode`

2887 - a Python str or unicode object,

2888

2889 then the unicode setting of the output array will be

2890 automatically determined.

2891

2892 order : {'C', 'F'}, optional

2893 Specify the order of the array. If order is 'C' (default), then the

2894 array will be in C-contiguous order (last-index varies the

2895 fastest). If order is 'F', then the returned array

2896 will be in Fortran-contiguous order (first-index varies the

2897 fastest).

2898 """

2899 return array(obj, itemsize, copy=False,

2900 unicode=unicode, order=order)