Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/string_ops.py: 66%

167 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# -*- coding: utf-8 -*- 

2# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# ============================================================================== 

16 

17"""Operations for working with string Tensors.""" 

18 

19import numpy as np 

20 

21from tensorflow.python.framework import constant_op 

22from tensorflow.python.framework import dtypes 

23from tensorflow.python.framework import ops 

24from tensorflow.python.framework import sparse_tensor 

25from tensorflow.python.framework import tensor_util 

26from tensorflow.python.ops import array_ops 

27from tensorflow.python.ops import gen_parsing_ops 

28from tensorflow.python.ops import gen_string_ops 

29from tensorflow.python.ops import math_ops 

30 

31# go/tf-wildcard-import 

32# pylint: disable=wildcard-import 

33# pylint: disable=g-bad-import-order 

34from tensorflow.python.ops.gen_string_ops import * 

35from tensorflow.python.util import compat as util_compat 

36from tensorflow.python.util import deprecation 

37from tensorflow.python.util import dispatch 

38from tensorflow.python.util.tf_export import tf_export 

39# pylint: enable=g-bad-import-order 

40# pylint: enable=wildcard-import 

41 

42 

43# pylint: disable=redefined-builtin 

44@tf_export("strings.regex_full_match") 

45@dispatch.register_unary_elementwise_api 

46@dispatch.add_dispatch_support 

47def regex_full_match(input, pattern, name=None): 

48 r"""Match elements of `input` with regex `pattern`. 

49 

50 Args: 

51 input: string `Tensor`, the source strings to process. 

52 pattern: string or scalar string `Tensor`, regular expression to use, 

53 see more details at https://github.com/google/re2/wiki/Syntax 

54 name: Name of the op. 

55 

56 Returns: 

57 bool `Tensor` of the same shape as `input` with match results. 

58 """ 

59 if isinstance(pattern, util_compat.bytes_or_text_types): 

60 # When `pattern` is static through the life of the op we can 

61 # use a version which performs the expensive regex compilation once at 

62 # creation time. 

63 return gen_string_ops.static_regex_full_match( 

64 input=input, pattern=pattern, name=name) 

65 return gen_string_ops.regex_full_match( 

66 input=input, pattern=pattern, name=name) 

67 

68regex_full_match.__doc__ = gen_string_ops.regex_full_match.__doc__ 

69 

70 

71@tf_export( 

72 "strings.regex_replace", v1=["strings.regex_replace", "regex_replace"]) 

73@dispatch.register_unary_elementwise_api 

74@dispatch.add_dispatch_support 

75@deprecation.deprecated_endpoints("regex_replace") 

76def regex_replace(input, pattern, rewrite, replace_global=True, name=None): 

77 r"""Replace elements of `input` matching regex `pattern` with `rewrite`. 

78 

79 >>> tf.strings.regex_replace("Text with tags.<br /><b>contains html</b>", 

80 ... "<[^>]+>", " ") 

81 <tf.Tensor: shape=(), dtype=string, numpy=b'Text with tags. contains html '> 

82 

83 Args: 

84 input: string `Tensor`, the source strings to process. 

85 pattern: string or scalar string `Tensor`, regular expression to use, 

86 see more details at https://github.com/google/re2/wiki/Syntax 

87 rewrite: string or scalar string `Tensor`, value to use in match 

88 replacement, supports backslash-escaped digits (\1 to \9) can be to insert 

89 text matching corresponding parenthesized group. 

90 replace_global: `bool`, if `True` replace all non-overlapping matches, 

91 else replace only the first match. 

92 name: A name for the operation (optional). 

93 

94 Returns: 

95 string `Tensor` of the same shape as `input` with specified replacements. 

96 """ 

97 if (isinstance(pattern, util_compat.bytes_or_text_types) and 

98 isinstance(rewrite, util_compat.bytes_or_text_types)): 

99 # When `pattern` and `rewrite` are static through the life of the op we can 

100 # use a version which performs the expensive regex compilation once at 

101 # creation time. 

102 return gen_string_ops.static_regex_replace( 

103 input=input, pattern=pattern, 

104 rewrite=rewrite, replace_global=replace_global, 

105 name=name) 

106 return gen_string_ops.regex_replace( 

107 input=input, pattern=pattern, 

108 rewrite=rewrite, replace_global=replace_global, 

109 name=name) 

110 

111 

112@tf_export("strings.format") 

113@dispatch.add_dispatch_support 

114def string_format(template, inputs, placeholder="{}", summarize=3, name=None): 

115 r"""Formats a string template using a list of tensors. 

116 

117 Formats a string template using a list of tensors, abbreviating tensors by 

118 only printing the first and last `summarize` elements of each dimension 

119 (recursively). If formatting only one tensor into a template, the tensor does 

120 not have to be wrapped in a list. 

121 

122 Example: 

123 Formatting a single-tensor template: 

124 

125 >>> tensor = tf.range(5) 

126 >>> tf.strings.format("tensor: {}, suffix", tensor) 

127 <tf.Tensor: shape=(), dtype=string, numpy=b'tensor: [0 1 2 3 4], suffix'> 

128 

129 Formatting a multi-tensor template: 

130 

131 >>> tensor_a = tf.range(2) 

132 >>> tensor_b = tf.range(1, 4, 2) 

133 >>> tf.strings.format("a: {}, b: {}, suffix", (tensor_a, tensor_b)) 

134 <tf.Tensor: shape=(), dtype=string, numpy=b'a: [0 1], b: [1 3], suffix'> 

135 

136 

137 Args: 

138 template: A string template to format tensor values into. 

139 inputs: A list of `Tensor` objects, or a single Tensor. 

140 The list of tensors to format into the template string. If a solitary 

141 tensor is passed in, the input tensor will automatically be wrapped as a 

142 list. 

143 placeholder: An optional `string`. Defaults to `{}`. 

144 At each placeholder occurring in the template, a subsequent tensor 

145 will be inserted. 

146 summarize: An optional `int`. Defaults to `3`. 

147 When formatting the tensors, show the first and last `summarize` 

148 entries of each tensor dimension (recursively). If set to -1, all 

149 elements of the tensor will be shown. 

150 name: A name for the operation (optional). 

151 

152 Returns: 

153 A scalar `Tensor` of type `string`. 

154 

155 Raises: 

156 ValueError: if the number of placeholders does not match the number of 

157 inputs. 

158 """ 

159 # If there is only one tensor to format, we will automatically wrap it in a 

160 # list to simplify the user experience 

161 if tensor_util.is_tf_type(inputs): 

162 inputs = [inputs] 

163 if template.count(placeholder) != len(inputs): 

164 raise ValueError(f"The template expects {template.count(placeholder)} " 

165 f"tensors, but the inputs only has {len(inputs)}. " 

166 "Please ensure the number of placeholders in template " 

167 "matches inputs length.") 

168 

169 return gen_string_ops.string_format(inputs, 

170 template=template, 

171 placeholder=placeholder, 

172 summarize=summarize, 

173 name=name) 

174 

175 

176# Note: tf.strings.split is exported in ragged/ragged_string_ops.py, which 

177# defines a wrapper for this function. 

178def string_split(source, sep=None, skip_empty=True, delimiter=None): # pylint: disable=invalid-name 

179 """Split elements of `source` based on `delimiter` into a `SparseTensor`. 

180 

181 Let N be the size of source (typically N will be the batch size). Split each 

182 element of `source` based on `delimiter` and return a `SparseTensor` 

183 containing the split tokens. Empty tokens are ignored. 

184 

185 If `sep` is an empty string, each element of the `source` is split 

186 into individual strings, each containing one byte. (This includes splitting 

187 multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is 

188 treated as a set of delimiters with each considered a potential split point. 

189 

190 For example: 

191 N = 2, source[0] is 'hello world' and source[1] is 'a b c', then the output 

192 will be 

193 

194 st.indices = [0, 0; 

195 0, 1; 

196 1, 0; 

197 1, 1; 

198 1, 2] 

199 st.shape = [2, 3] 

200 st.values = ['hello', 'world', 'a', 'b', 'c'] 

201 

202 Args: 

203 source: `1-D` string `Tensor`, the strings to split. 

204 sep: `0-D` string `Tensor`, the delimiter character, the string should 

205 be length 0 or 1. Default is ' '. 

206 skip_empty: A `bool`. If `True`, skip the empty strings from the result. 

207 delimiter: deprecated alias for `sep`. 

208 

209 Raises: 

210 ValueError: If delimiter is not a string. 

211 

212 Returns: 

213 A `SparseTensor` of rank `2`, the strings split according to the delimiter. 

214 The first column of the indices corresponds to the row in `source` and the 

215 second column corresponds to the index of the split component in this row. 

216 """ 

217 delimiter = deprecation.deprecated_argument_lookup( 

218 "sep", sep, "delimiter", delimiter) 

219 

220 if delimiter is None: 

221 delimiter = " " 

222 delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string) 

223 source = ops.convert_to_tensor(source, dtype=dtypes.string) 

224 

225 indices, values, shape = gen_string_ops.string_split( 

226 source, delimiter=delimiter, skip_empty=skip_empty) 

227 indices.set_shape([None, 2]) 

228 values.set_shape([None]) 

229 shape.set_shape([2]) 

230 return sparse_tensor.SparseTensor(indices, values, shape) 

231 

232 

233# Note: tf.strings.split is exported in ragged/ragged_string_ops.py, which 

234# defines a wrapper for this function. 

235def string_split_v2(source, sep=None, maxsplit=-1): 

236 """Split elements of `source` based on `sep` into a `SparseTensor`. 

237 

238 Let N be the size of source (typically N will be the batch size). Split each 

239 element of `source` based on `sep` and return a `SparseTensor` 

240 containing the split tokens. Empty tokens are ignored. 

241 

242 For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', 

243 then the output will be 

244 

245 st.indices = [0, 0; 

246 0, 1; 

247 1, 0; 

248 1, 1; 

249 1, 2] 

250 st.shape = [2, 3] 

251 st.values = ['hello', 'world', 'a', 'b', 'c'] 

252 

253 If `sep` is given, consecutive delimiters are not grouped together and are 

254 deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and 

255 sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty 

256 string, consecutive whitespace are regarded as a single separator, and the 

257 result will contain no empty strings at the start or end if the string has 

258 leading or trailing whitespace. 

259 

260 Note that the above mentioned behavior matches python's str.split. 

261 

262 Args: 

263 source: `1-D` string `Tensor`, the strings to split. 

264 sep: `0-D` string `Tensor`, the delimiter character. 

265 maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. 

266 

267 Raises: 

268 ValueError: If sep is not a string. 

269 

270 Returns: 

271 A `SparseTensor` of rank `2`, the strings split according to the delimiter. 

272 The first column of the indices corresponds to the row in `source` and the 

273 second column corresponds to the index of the split component in this row. 

274 """ 

275 if sep is None: 

276 sep = "" 

277 sep = ops.convert_to_tensor(sep, dtype=dtypes.string) 

278 source = ops.convert_to_tensor(source, dtype=dtypes.string) 

279 

280 indices, values, shape = gen_string_ops.string_split_v2( 

281 source, sep=sep, maxsplit=maxsplit) 

282 indices.set_shape([None, 2]) 

283 values.set_shape([None]) 

284 shape.set_shape([2]) 

285 return sparse_tensor.SparseTensor(indices, values, shape) 

286 

287 

288def _reduce_join_reduction_dims(x, axis): 

289 """Returns range(rank(x) - 1, 0, -1) if axis is None; or axis otherwise.""" 

290 if axis is not None: 

291 return axis 

292 else: 

293 # Fast path: avoid creating Rank and Range ops if ndims is known. 

294 if x.get_shape().ndims is not None: 

295 return constant_op.constant( 

296 np.arange(x.get_shape().ndims - 1, -1, -1), dtype=dtypes.int32) 

297 

298 # Otherwise, we rely on Range and Rank to do the right thing at run-time. 

299 return math_ops.range(array_ops.rank(x) - 1, -1, -1) 

300 

301 

302@tf_export(v1=["strings.reduce_join", "reduce_join"]) 

303@dispatch.add_dispatch_support 

304@deprecation.deprecated_args(None, 

305 "keep_dims is deprecated, use keepdims instead", 

306 "keep_dims") 

307@deprecation.deprecated_endpoints("reduce_join") 

308def reduce_join(inputs, axis=None, # pylint: disable=missing-docstring 

309 keep_dims=None, 

310 separator="", 

311 name=None, 

312 reduction_indices=None, 

313 keepdims=None): 

314 keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, 

315 "keep_dims", keep_dims) 

316 if keep_dims is None: 

317 keep_dims = False 

318 axis = deprecation.deprecated_argument_lookup("axis", axis, 

319 "reduction_indices", 

320 reduction_indices) 

321 return reduce_join_v2( 

322 inputs=inputs, 

323 axis=axis, 

324 keepdims=keepdims, 

325 separator=separator, 

326 name=name) 

327 

328 

329@tf_export("strings.reduce_join", v1=[]) 

330@dispatch.add_dispatch_support 

331def reduce_join_v2( # pylint: disable=missing-docstring 

332 inputs, 

333 axis=None, 

334 keepdims=False, 

335 separator="", 

336 name=None): 

337 """Joins all strings into a single string, or joins along an axis. 

338 

339 This is the reduction operation for the elementwise `tf.strings.join` op. 

340 

341 >>> tf.strings.reduce_join([['abc','123'], 

342 ... ['def','456']]).numpy() 

343 b'abc123def456' 

344 >>> tf.strings.reduce_join([['abc','123'], 

345 ... ['def','456']], axis=-1).numpy() 

346 array([b'abc123', b'def456'], dtype=object) 

347 >>> tf.strings.reduce_join([['abc','123'], 

348 ... ['def','456']], 

349 ... axis=-1, 

350 ... separator=" ").numpy() 

351 array([b'abc 123', b'def 456'], dtype=object) 

352 

353 Args: 

354 inputs: A `tf.string` tensor. 

355 axis: Which axis to join along. The default behavior is to join all 

356 elements, producing a scalar. 

357 keepdims: If true, retains reduced dimensions with length 1. 

358 separator: a string added between each string being joined. 

359 name: A name for the operation (optional). 

360 

361 Returns: 

362 A `tf.string` tensor. 

363 """ 

364 with ops.name_scope(None, "ReduceJoin", [inputs, axis]): 

365 inputs_t = ops.convert_to_tensor(inputs) 

366 axis = _reduce_join_reduction_dims(inputs_t, axis) 

367 return gen_string_ops.reduce_join( 

368 inputs=inputs_t, 

369 reduction_indices=axis, 

370 keep_dims=keepdims, 

371 separator=separator, 

372 name=name) 

373 

374reduce_join.__doc__ = reduce_join_v2.__doc__ 

375 

376 

377# This wrapper provides backwards compatibility for code that predates the 

378# unit argument and that passed 'name' as a positional argument. 

379@tf_export(v1=["strings.length"]) 

380@dispatch.register_unary_elementwise_api 

381@dispatch.add_dispatch_support 

382def string_length(input, name=None, unit="BYTE"): 

383 """Computes the length of each string given in the input tensor. 

384 

385 >>> strings = tf.constant(['Hello','TensorFlow', '🙂']) 

386 >>> tf.strings.length(strings).numpy() # default counts bytes 

387 array([ 5, 10, 4], dtype=int32) 

388 >>> tf.strings.length(strings, unit="UTF8_CHAR").numpy() 

389 array([ 5, 10, 1], dtype=int32) 

390 

391 Args: 

392 input: A `Tensor` of type `string`. The strings for which to compute the 

393 length for each element. 

394 name: A name for the operation (optional). 

395 unit: An optional `string` from: `"BYTE", "UTF8_CHAR"`. Defaults to 

396 `"BYTE"`. The unit that is counted to compute string length. One of: 

397 `"BYTE"` (for the number of bytes in each string) or `"UTF8_CHAR"` (for 

398 the number of UTF-8 encoded Unicode code points in each string). Results 

399 are undefined if `unit=UTF8_CHAR` and the `input` strings do not contain 

400 structurally valid UTF-8. 

401 

402 Returns: 

403 A `Tensor` of type `int32`, containing the length of the input string in 

404 the same element of the input tensor. 

405 """ 

406 return gen_string_ops.string_length(input, unit=unit, name=name) 

407 

408 

409@tf_export("strings.length", v1=[]) 

410@dispatch.register_unary_elementwise_api 

411@dispatch.add_dispatch_support 

412def string_length_v2(input, unit="BYTE", name=None): 

413 return gen_string_ops.string_length(input, unit=unit, name=name) 

414 

415 

416string_length_v2.__doc__ = gen_string_ops.string_length.__doc__ 

417 

418 

419@tf_export(v1=["substr"]) 

420@dispatch.register_unary_elementwise_api 

421@dispatch.add_dispatch_support 

422@deprecation.deprecated(None, "Use `tf.strings.substr` instead of `tf.substr`.") 

423def substr_deprecated(input, pos, len, name=None, unit="BYTE"): 

424 return substr(input, pos, len, name=name, unit=unit) 

425 

426substr_deprecated.__doc__ = gen_string_ops.substr.__doc__ 

427 

428 

429@tf_export(v1=["strings.substr"]) 

430@dispatch.register_unary_elementwise_api 

431@dispatch.add_dispatch_support 

432def substr(input, pos, len, name=None, unit="BYTE"): 

433 return gen_string_ops.substr(input, pos, len, unit=unit, name=name) 

434 

435substr.__doc__ = gen_string_ops.substr.__doc__ 

436 

437 

438@tf_export("strings.substr", v1=[]) 

439@dispatch.register_unary_elementwise_api 

440@dispatch.add_dispatch_support 

441def substr_v2(input, pos, len, unit="BYTE", name=None): 

442 return gen_string_ops.substr(input, pos, len, unit=unit, name=name) 

443 

444substr_v2.__doc__ = gen_string_ops.substr.__doc__ 

445 

446 

447ops.NotDifferentiable("RegexReplace") 

448ops.NotDifferentiable("StringToHashBucket") 

449ops.NotDifferentiable("StringToHashBucketFast") 

450ops.NotDifferentiable("StringToHashBucketStrong") 

451ops.NotDifferentiable("ReduceJoin") 

452ops.NotDifferentiable("StringJoin") 

453ops.NotDifferentiable("StringSplit") 

454ops.NotDifferentiable("AsString") 

455ops.NotDifferentiable("EncodeBase64") 

456ops.NotDifferentiable("DecodeBase64") 

457 

458 

459@tf_export("strings.to_number", v1=[]) 

460@dispatch.register_unary_elementwise_api 

461@dispatch.add_dispatch_support 

462def string_to_number(input, out_type=dtypes.float32, name=None): 

463 r"""Converts each string in the input Tensor to the specified numeric type. 

464 

465 (Note that int32 overflow results in an error while float overflow 

466 results in a rounded value.) 

467 

468 Examples: 

469 

470 >>> tf.strings.to_number("1.55") 

471 <tf.Tensor: shape=(), dtype=float32, numpy=1.55> 

472 >>> tf.strings.to_number("3", tf.int32) 

473 <tf.Tensor: shape=(), dtype=int32, numpy=3> 

474 

475 Args: 

476 input: A `Tensor` of type `string`. 

477 out_type: An optional `tf.DType` from: `tf.float32, tf.float64, tf.int32, 

478 tf.int64`. Defaults to `tf.float32`. 

479 The numeric type to interpret each string in `string_tensor` as. 

480 name: A name for the operation (optional). 

481 

482 Returns: 

483 A `Tensor` of type `out_type`. 

484 """ 

485 return gen_parsing_ops.string_to_number(input, out_type, name) 

486 

487 

488@tf_export(v1=["strings.to_number", "string_to_number"]) 

489@dispatch.add_dispatch_support 

490def string_to_number_v1( 

491 string_tensor=None, 

492 out_type=dtypes.float32, 

493 name=None, 

494 input=None): 

495 string_tensor = deprecation.deprecated_argument_lookup( 

496 "input", input, "string_tensor", string_tensor) 

497 return gen_parsing_ops.string_to_number(string_tensor, out_type, name) 

498 

499string_to_number_v1.__doc__ = gen_parsing_ops.string_to_number.__doc__ 

500 

501 

502@tf_export("strings.to_hash_bucket", v1=[]) 

503@dispatch.register_unary_elementwise_api 

504@dispatch.add_dispatch_support 

505def string_to_hash_bucket(input, num_buckets, name=None): 

506 # pylint: disable=line-too-long 

507 r"""Converts each string in the input Tensor to its hash mod by a number of buckets. 

508 

509 The hash function is deterministic on the content of the string within the 

510 process. 

511 

512 Note that the hash function may change from time to time. 

513 This functionality will be deprecated and it's recommended to use 

514 `tf.strings.to_hash_bucket_fast()` or `tf.strings.to_hash_bucket_strong()`. 

515 

516 Examples: 

517 

518 >>> tf.strings.to_hash_bucket(["Hello", "TensorFlow", "2.x"], 3) 

519 <tf.Tensor: shape=(3,), dtype=int64, numpy=array([2, 0, 1])> 

520 

521 Args: 

522 input: A `Tensor` of type `string`. 

523 num_buckets: An `int` that is `>= 1`. The number of buckets. 

524 name: A name for the operation (optional). 

525 

526 Returns: 

527 A `Tensor` of type `int64`. 

528 """ 

529 # pylint: enable=line-too-long 

530 return gen_string_ops.string_to_hash_bucket(input, num_buckets, name) 

531 

532 

533@tf_export(v1=["strings.to_hash_bucket", "string_to_hash_bucket"]) 

534@dispatch.register_unary_elementwise_api 

535@dispatch.add_dispatch_support 

536def string_to_hash_bucket_v1( # pylint: disable=missing-function-docstring 

537 string_tensor=None, 

538 num_buckets=None, 

539 name=None, 

540 input=None): 

541 string_tensor = deprecation.deprecated_argument_lookup( 

542 "input", input, "string_tensor", string_tensor) 

543 return gen_string_ops.string_to_hash_bucket(string_tensor, num_buckets, name) 

544 

545string_to_hash_bucket_v1.__doc__ = gen_string_ops.string_to_hash_bucket.__doc__ 

546 

547 

548@tf_export("strings.join", v1=["strings.join", "string_join"]) 

549@dispatch.add_dispatch_support 

550@deprecation.deprecated_endpoints("string_join") 

551def string_join(inputs, separator="", name=None): 

552 """Perform element-wise concatenation of a list of string tensors. 

553 

554 Given a list of string tensors of same shape, performs element-wise 

555 concatenation of the strings of the same index in all tensors. 

556 

557 

558 >>> tf.strings.join(['abc','def']).numpy() 

559 b'abcdef' 

560 >>> tf.strings.join([['abc','123'], 

561 ... ['def','456'], 

562 ... ['ghi','789']]).numpy() 

563 array([b'abcdefghi', b'123456789'], dtype=object) 

564 >>> tf.strings.join([['abc','123'], 

565 ... ['def','456']], 

566 ... separator=" ").numpy() 

567 array([b'abc def', b'123 456'], dtype=object) 

568 

569 The reduction version of this elementwise operation is 

570 `tf.strings.reduce_join` 

571 

572 Args: 

573 inputs: A list of `tf.Tensor` objects of same size and `tf.string` dtype. 

574 separator: A string added between each string being joined. 

575 name: A name for the operation (optional). 

576 

577 Returns: 

578 A `tf.string` tensor. 

579 """ 

580 return gen_string_ops.string_join(inputs, separator=separator, name=name) 

581 

582 

583@tf_export("strings.unsorted_segment_join") 

584@dispatch.add_dispatch_support 

585def unsorted_segment_join(inputs, 

586 segment_ids, 

587 num_segments, 

588 separator="", 

589 name=None): 

590 """Joins the elements of `inputs` based on `segment_ids`. 

591 

592 Computes the string join along segments of a tensor. 

593 

594 Given `segment_ids` with rank `N` and `data` with rank `N+M`: 

595 

596 ``` 

597 output[i, k1...kM] = strings.join([data[j1...jN, k1...kM]) 

598 ``` 

599 

600 where the join is over all `[j1...jN]` such that `segment_ids[j1...jN] = i`. 

601 

602 Strings are joined in row-major order. 

603 

604 For example: 

605 

606 >>> inputs = ['this', 'a', 'test', 'is'] 

607 >>> segment_ids = [0, 1, 1, 0] 

608 >>> num_segments = 2 

609 >>> separator = ' ' 

610 >>> tf.strings.unsorted_segment_join(inputs, segment_ids, num_segments, 

611 ... separator).numpy() 

612 array([b'this is', b'a test'], dtype=object) 

613 

614 >>> inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']] 

615 >>> segment_ids = [1, 0, 1] 

616 >>> num_segments = 2 

617 >>> tf.strings.unsorted_segment_join(inputs, segment_ids, num_segments, 

618 ... separator=':').numpy() 

619 array([[b'Y', b'6', b'6'], 

620 [b'Y:p', b'q:G', b'c:a']], dtype=object) 

621 

622 Args: 

623 inputs: A list of `tf.Tensor` objects of type `tf.string`. 

624 segment_ids: A tensor whose shape is a prefix of `inputs.shape` and whose 

625 type must be `tf.int32` or `tf.int64`. Negative segment ids are not 

626 supported. 

627 num_segments: A scalar of type `tf.int32` or `tf.int64`. Must be 

628 non-negative and larger than any segment id. 

629 separator: The separator to use when joining. Defaults to `""`. 

630 name: A name for the operation (optional). 

631 

632 Returns: 

633 A `tf.string` tensor representing the concatenated values, using the given 

634 separator. 

635 """ 

636 return gen_string_ops.unsorted_segment_join( 

637 inputs, segment_ids, num_segments, separator=separator, name=name) 

638 

639 

640# Register elementwise ops that don't have Python wrappers. 

641dispatch.register_unary_elementwise_api(gen_string_ops.as_string) 

642dispatch.register_unary_elementwise_api(gen_string_ops.decode_base64) 

643dispatch.register_unary_elementwise_api(gen_string_ops.encode_base64) 

644dispatch.register_unary_elementwise_api(gen_string_ops.string_lower) 

645dispatch.register_unary_elementwise_api(gen_string_ops.string_upper) 

646dispatch.register_unary_elementwise_api(gen_string_ops.unicode_transcode) 

647dispatch.register_unary_elementwise_api(gen_string_ops.string_strip) 

648dispatch.register_unary_elementwise_api( 

649 gen_string_ops.string_to_hash_bucket_fast) 

650dispatch.register_unary_elementwise_api( 

651 gen_string_ops.string_to_hash_bucket_strong) 

652dispatch.register_unary_elementwise_api(gen_string_ops.unicode_script)