Coverage Report

Created: 2024-07-06 06:44

/rust/registry/src/index.crates.io-6f17d22bba15001f/textwrap-0.11.0/src/splitting.rs
Line
Count
Source (jump to first uncovered line)
1
//! Word splitting functionality.
2
//!
3
//! To wrap text into lines, long words sometimes need to be split
4
//! across lines. The [`WordSplitter`] trait defines this
5
//! functionality. [`HyphenSplitter`] is the default implementation of
6
//! this treat: it will simply split words on existing hyphens.
7
8
#[cfg(feature = "hyphenation")]
9
use hyphenation::{Hyphenator, Standard};
10
11
/// An interface for splitting words.
12
///
13
/// When the [`wrap_iter`] method will try to fit text into a line, it
14
/// will eventually find a word that it too large the current text
15
/// width. It will then call the currently configured `WordSplitter` to
16
/// have it attempt to split the word into smaller parts. This trait
17
/// describes that functionality via the [`split`] method.
18
///
19
/// If the `textwrap` crate has been compiled with the `hyphenation`
20
/// feature enabled, you will find an implementation of `WordSplitter`
21
/// by the `hyphenation::language::Corpus` struct. Use this struct for
22
/// language-aware hyphenation. See the [`hyphenation` documentation]
23
/// for details.
24
///
25
/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
26
/// [`split`]: #tymethod.split
27
/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
28
pub trait WordSplitter {
29
    /// Return all possible splits of word. Each split is a triple
30
    /// with a head, a hyphen, and a tail where `head + &hyphen +
31
    /// &tail == word`. The hyphen can be empty if there is already a
32
    /// hyphen in the head.
33
    ///
34
    /// The splits should go from smallest to longest and should
35
    /// include no split at all. So the word "technology" could be
36
    /// split into
37
    ///
38
    /// ```no_run
39
    /// vec![("tech", "-", "nology"),
40
    ///      ("technol", "-", "ogy"),
41
    ///      ("technolo", "-", "gy"),
42
    ///      ("technology", "", "")];
43
    /// ```
44
    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
45
}
46
47
/// Use this as a [`Wrapper.splitter`] to avoid any kind of
48
/// hyphenation:
49
///
50
/// ```
51
/// use textwrap::{Wrapper, NoHyphenation};
52
///
53
/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
54
/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
55
/// ```
56
///
57
/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
58
#[derive(Clone, Debug)]
59
pub struct NoHyphenation;
60
61
/// `NoHyphenation` implements `WordSplitter` by not splitting the
62
/// word at all.
63
impl WordSplitter for NoHyphenation {
64
0
    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
65
0
        vec![(word, "", "")]
66
0
    }
67
}
68
69
/// Simple and default way to split words: splitting on existing
70
/// hyphens only.
71
///
72
/// You probably don't need to use this type since it's already used
73
/// by default by `Wrapper::new`.
74
#[derive(Clone, Debug)]
75
pub struct HyphenSplitter;
76
77
/// `HyphenSplitter` is the default `WordSplitter` used by
78
/// `Wrapper::new`. It will split words on any existing hyphens in the
79
/// word.
80
///
81
/// It will only use hyphens that are surrounded by alphanumeric
82
/// characters, which prevents a word like "--foo-bar" from being
83
/// split on the first or second hyphen.
84
impl WordSplitter for HyphenSplitter {
85
0
    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
86
0
        let mut triples = Vec::new();
87
0
        // Split on hyphens, smallest split first. We only use hyphens
88
0
        // that are surrounded by alphanumeric characters. This is to
89
0
        // avoid splitting on repeated hyphens, such as those found in
90
0
        // --foo-bar.
91
0
        let mut char_indices = word.char_indices();
92
        // Early return if the word is empty.
93
0
        let mut prev = match char_indices.next() {
94
0
            None => return vec![(word, "", "")],
95
0
            Some((_, ch)) => ch,
96
        };
97
98
        // Find current word, or return early if the word only has a
99
        // single character.
100
0
        let (mut idx, mut cur) = match char_indices.next() {
101
0
            None => return vec![(word, "", "")],
102
0
            Some((idx, cur)) => (idx, cur),
103
        };
104
105
0
        for (i, next) in char_indices {
106
0
            if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
107
0
                let (head, tail) = word.split_at(idx + 1);
108
0
                triples.push((head, "", tail));
109
0
            }
110
0
            prev = cur;
111
0
            idx = i;
112
0
            cur = next;
113
        }
114
115
        // Finally option is no split at all.
116
0
        triples.push((word, "", ""));
117
0
118
0
        triples
119
0
    }
120
}
121
122
/// A hyphenation dictionary can be used to do language-specific
123
/// hyphenation using patterns from the hyphenation crate.
124
#[cfg(feature = "hyphenation")]
125
impl WordSplitter for Standard {
126
    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
127
        // Find splits based on language dictionary.
128
        let mut triples = Vec::new();
129
        for n in self.hyphenate(word).breaks {
130
            let (head, tail) = word.split_at(n);
131
            let hyphen = if head.ends_with('-') { "" } else { "-" };
132
            triples.push((head, hyphen, tail));
133
        }
134
        // Finally option is no split at all.
135
        triples.push((word, "", ""));
136
137
        triples
138
    }
139
}