/rust/registry/src/index.crates.io-6f17d22bba15001f/textwrap-0.11.0/src/splitting.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Word splitting functionality. |
2 | | //! |
3 | | //! To wrap text into lines, long words sometimes need to be split |
4 | | //! across lines. The [`WordSplitter`] trait defines this |
5 | | //! functionality. [`HyphenSplitter`] is the default implementation of |
6 | | //! this treat: it will simply split words on existing hyphens. |
7 | | |
8 | | #[cfg(feature = "hyphenation")] |
9 | | use hyphenation::{Hyphenator, Standard}; |
10 | | |
11 | | /// An interface for splitting words. |
12 | | /// |
13 | | /// When the [`wrap_iter`] method will try to fit text into a line, it |
14 | | /// will eventually find a word that it too large the current text |
15 | | /// width. It will then call the currently configured `WordSplitter` to |
16 | | /// have it attempt to split the word into smaller parts. This trait |
17 | | /// describes that functionality via the [`split`] method. |
18 | | /// |
19 | | /// If the `textwrap` crate has been compiled with the `hyphenation` |
20 | | /// feature enabled, you will find an implementation of `WordSplitter` |
21 | | /// by the `hyphenation::language::Corpus` struct. Use this struct for |
22 | | /// language-aware hyphenation. See the [`hyphenation` documentation] |
23 | | /// for details. |
24 | | /// |
25 | | /// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter |
26 | | /// [`split`]: #tymethod.split |
27 | | /// [`hyphenation` documentation]: https://docs.rs/hyphenation/ |
28 | | pub trait WordSplitter { |
29 | | /// Return all possible splits of word. Each split is a triple |
30 | | /// with a head, a hyphen, and a tail where `head + &hyphen + |
31 | | /// &tail == word`. The hyphen can be empty if there is already a |
32 | | /// hyphen in the head. |
33 | | /// |
34 | | /// The splits should go from smallest to longest and should |
35 | | /// include no split at all. So the word "technology" could be |
36 | | /// split into |
37 | | /// |
38 | | /// ```no_run |
39 | | /// vec![("tech", "-", "nology"), |
40 | | /// ("technol", "-", "ogy"), |
41 | | /// ("technolo", "-", "gy"), |
42 | | /// ("technology", "", "")]; |
43 | | /// ``` |
44 | | fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; |
45 | | } |
46 | | |
47 | | /// Use this as a [`Wrapper.splitter`] to avoid any kind of |
48 | | /// hyphenation: |
49 | | /// |
50 | | /// ``` |
51 | | /// use textwrap::{Wrapper, NoHyphenation}; |
52 | | /// |
53 | | /// let wrapper = Wrapper::with_splitter(8, NoHyphenation); |
54 | | /// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); |
55 | | /// ``` |
56 | | /// |
57 | | /// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter |
58 | | #[derive(Clone, Debug)] |
59 | | pub struct NoHyphenation; |
60 | | |
61 | | /// `NoHyphenation` implements `WordSplitter` by not splitting the |
62 | | /// word at all. |
63 | | impl WordSplitter for NoHyphenation { |
64 | 0 | fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { |
65 | 0 | vec![(word, "", "")] |
66 | 0 | } |
67 | | } |
68 | | |
69 | | /// Simple and default way to split words: splitting on existing |
70 | | /// hyphens only. |
71 | | /// |
72 | | /// You probably don't need to use this type since it's already used |
73 | | /// by default by `Wrapper::new`. |
74 | | #[derive(Clone, Debug)] |
75 | | pub struct HyphenSplitter; |
76 | | |
77 | | /// `HyphenSplitter` is the default `WordSplitter` used by |
78 | | /// `Wrapper::new`. It will split words on any existing hyphens in the |
79 | | /// word. |
80 | | /// |
81 | | /// It will only use hyphens that are surrounded by alphanumeric |
82 | | /// characters, which prevents a word like "--foo-bar" from being |
83 | | /// split on the first or second hyphen. |
84 | | impl WordSplitter for HyphenSplitter { |
85 | 0 | fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { |
86 | 0 | let mut triples = Vec::new(); |
87 | 0 | // Split on hyphens, smallest split first. We only use hyphens |
88 | 0 | // that are surrounded by alphanumeric characters. This is to |
89 | 0 | // avoid splitting on repeated hyphens, such as those found in |
90 | 0 | // --foo-bar. |
91 | 0 | let mut char_indices = word.char_indices(); |
92 | | // Early return if the word is empty. |
93 | 0 | let mut prev = match char_indices.next() { |
94 | 0 | None => return vec![(word, "", "")], |
95 | 0 | Some((_, ch)) => ch, |
96 | | }; |
97 | | |
98 | | // Find current word, or return early if the word only has a |
99 | | // single character. |
100 | 0 | let (mut idx, mut cur) = match char_indices.next() { |
101 | 0 | None => return vec![(word, "", "")], |
102 | 0 | Some((idx, cur)) => (idx, cur), |
103 | | }; |
104 | | |
105 | 0 | for (i, next) in char_indices { |
106 | 0 | if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { |
107 | 0 | let (head, tail) = word.split_at(idx + 1); |
108 | 0 | triples.push((head, "", tail)); |
109 | 0 | } |
110 | 0 | prev = cur; |
111 | 0 | idx = i; |
112 | 0 | cur = next; |
113 | | } |
114 | | |
115 | | // Finally option is no split at all. |
116 | 0 | triples.push((word, "", "")); |
117 | 0 |
|
118 | 0 | triples |
119 | 0 | } |
120 | | } |
121 | | |
122 | | /// A hyphenation dictionary can be used to do language-specific |
123 | | /// hyphenation using patterns from the hyphenation crate. |
124 | | #[cfg(feature = "hyphenation")] |
125 | | impl WordSplitter for Standard { |
126 | | fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { |
127 | | // Find splits based on language dictionary. |
128 | | let mut triples = Vec::new(); |
129 | | for n in self.hyphenate(word).breaks { |
130 | | let (head, tail) = word.split_at(n); |
131 | | let hyphen = if head.ends_with('-') { "" } else { "-" }; |
132 | | triples.push((head, hyphen, tail)); |
133 | | } |
134 | | // Finally option is no split at all. |
135 | | triples.push((word, "", "")); |
136 | | |
137 | | triples |
138 | | } |
139 | | } |