/src/mosh/src/terminal/parser.cc
Line | Count | Source |
1 | | /* |
2 | | Mosh: the mobile shell |
3 | | Copyright 2012 Keith Winstein |
4 | | |
5 | | This program is free software: you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation, either version 3 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | | |
18 | | In addition, as a special exception, the copyright holders give |
19 | | permission to link the code of portions of this program with the |
20 | | OpenSSL library under certain conditions as described in each |
21 | | individual source file, and distribute linked combinations including |
22 | | the two. |
23 | | |
24 | | You must obey the GNU General Public License in all respects for all |
25 | | of the code used other than OpenSSL. If you modify file(s) with this |
26 | | exception, you may extend this exception to your version of the |
27 | | file(s), but you are not obligated to do so. If you do not wish to do |
28 | | so, delete this exception statement from your version. If you delete |
29 | | this exception statement from all source files in the program, then |
30 | | also delete it here. |
31 | | */ |
32 | | |
33 | | #include <cassert> |
34 | | #include <cerrno> |
35 | | #include <cstdint> |
36 | | #include <cwchar> |
37 | | #include <typeinfo> |
38 | | |
39 | | #include "src/terminal/parser.h" |
40 | | |
41 | | const Parser::StateFamily Parser::family; |
42 | | |
43 | | static void append_or_delete( Parser::ActionPointer act, Parser::Actions& vec ) |
44 | 11.3M | { |
45 | 11.3M | assert( act ); |
46 | | |
47 | 11.3M | if ( !act->ignore() ) { |
48 | 8.68M | vec.push_back( act ); |
49 | 8.68M | } |
50 | 11.3M | } |
51 | | |
52 | | void Parser::Parser::input( wchar_t ch, Actions& ret ) |
53 | 7.82M | { |
54 | 7.82M | Transition tx = state->input( ch ); |
55 | | |
56 | 7.82M | if ( tx.next_state != NULL ) { |
57 | 1.75M | append_or_delete( state->exit(), ret ); |
58 | 1.75M | } |
59 | | |
60 | 7.82M | append_or_delete( tx.action, ret ); |
61 | | |
62 | 7.82M | if ( tx.next_state != NULL ) { |
63 | 1.75M | append_or_delete( tx.next_state->enter(), ret ); |
64 | 1.75M | state = tx.next_state; |
65 | 1.75M | } |
66 | 7.82M | } |
67 | | |
68 | 1.31k | Parser::UTF8Parser::UTF8Parser() : parser(), buf_len( 0 ) |
69 | 1.31k | { |
70 | 1.31k | assert( BUF_SIZE >= (size_t)MB_CUR_MAX ); |
71 | 1.31k | buf[0] = '\0'; |
72 | 1.31k | } |
73 | | |
74 | | void Parser::UTF8Parser::input( char c, Actions& ret ) |
75 | 7.82M | { |
76 | 7.82M | assert( buf_len < BUF_SIZE ); |
77 | | |
78 | | /* 1-byte UTF-8 character, aka ASCII? Cheat. */ |
79 | 7.82M | if ( buf_len == 0 && static_cast<unsigned char>( c ) <= 0x7f ) { |
80 | 7.04M | parser.input( static_cast<wchar_t>( c ), ret ); |
81 | 7.04M | return; |
82 | 7.04M | } |
83 | | |
84 | 784k | buf[buf_len++] = c; |
85 | | |
86 | | /* This function will only work in a UTF-8 locale. */ |
87 | 784k | wchar_t pwc; |
88 | 784k | mbstate_t ps = mbstate_t(); |
89 | | |
90 | 784k | size_t total_bytes_parsed = 0; |
91 | 784k | size_t orig_buf_len = buf_len; |
92 | | |
93 | | /* this routine is somewhat complicated in order to comply with |
94 | | Unicode 6.0, section 3.9, "Best Practices for using U+FFFD" */ |
95 | | |
96 | 1.56M | while ( total_bytes_parsed != orig_buf_len ) { |
97 | 784k | assert( total_bytes_parsed < orig_buf_len ); |
98 | 784k | assert( buf_len > 0 ); |
99 | 784k | size_t bytes_parsed = mbrtowc( &pwc, buf, buf_len, &ps ); |
100 | | |
101 | | /* this returns 0 when n = 0! */ |
102 | | |
103 | 784k | if ( bytes_parsed == 0 ) { |
104 | | /* character was NUL, accept and clear buffer */ |
105 | 0 | assert( buf_len == 1 ); |
106 | 0 | buf_len = 0; |
107 | 0 | pwc = L'\0'; |
108 | 0 | bytes_parsed = 1; |
109 | 784k | } else if ( bytes_parsed == (size_t)-1 ) { |
110 | | /* invalid sequence, use replacement character and try again with last char */ |
111 | 784k | assert( errno == EILSEQ ); |
112 | 784k | if ( buf_len > 1 ) { |
113 | 0 | buf[0] = buf[buf_len - 1]; |
114 | 0 | bytes_parsed = buf_len - 1; |
115 | 0 | buf_len = 1; |
116 | 784k | } else { |
117 | 784k | buf_len = 0; |
118 | 784k | bytes_parsed = 1; |
119 | 784k | } |
120 | 784k | pwc = (wchar_t)0xFFFD; |
121 | 784k | } else if ( bytes_parsed == (size_t)-2 ) { |
122 | | /* can't parse incomplete multibyte character */ |
123 | 0 | total_bytes_parsed += buf_len; |
124 | 0 | continue; |
125 | 0 | } else { |
126 | | /* parsed into pwc, accept */ |
127 | 0 | assert( bytes_parsed <= buf_len ); |
128 | 0 | memmove( buf, buf + bytes_parsed, buf_len - bytes_parsed ); |
129 | 0 | buf_len = buf_len - bytes_parsed; |
130 | 0 | } |
131 | | |
132 | | /* Cast to unsigned for checks, because some |
133 | | platforms (e.g. ARM) use uint32_t as wchar_t, |
134 | | causing compiler warning on "pwc > 0" check. */ |
135 | 784k | const uint32_t pwcheck = pwc; |
136 | | |
137 | 784k | if ( pwcheck > 0x10FFFF ) { /* outside Unicode range */ |
138 | 0 | pwc = (wchar_t)0xFFFD; |
139 | 0 | } |
140 | | |
141 | 784k | if ( ( pwcheck >= 0xD800 ) && ( pwcheck <= 0xDFFF ) ) { /* surrogate code point */ |
142 | | /* |
143 | | OS X unfortunately allows these sequences without EILSEQ, but |
144 | | they are ill-formed UTF-8 and we shouldn't repeat them to the |
145 | | user's terminal. |
146 | | */ |
147 | 0 | pwc = (wchar_t)0xFFFD; |
148 | 0 | } |
149 | | |
150 | 784k | parser.input( pwc, ret ); |
151 | | |
152 | 784k | total_bytes_parsed += bytes_parsed; |
153 | 784k | } |
154 | 784k | } |
155 | | |
156 | 0 | Parser::Parser::Parser( const Parser& other ) : state( other.state ) {} |
157 | | |
158 | | Parser::Parser& Parser::Parser::operator=( const Parser& other ) |
159 | 0 | { |
160 | 0 | state = other.state; |
161 | 0 | return *this; |
162 | 0 | } |