Coverage Report

Created: 2026-06-15 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mosh/src/terminal/parser.cc
Line
Count
Source
1
/*
2
    Mosh: the mobile shell
3
    Copyright 2012 Keith Winstein
4
5
    This program is free software: you can redistribute it and/or modify
6
    it under the terms of the GNU General Public License as published by
7
    the Free Software Foundation, either version 3 of the License, or
8
    (at your option) any later version.
9
10
    This program is distributed in the hope that it will be useful,
11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
    GNU General Public License for more details.
14
15
    You should have received a copy of the GNU General Public License
16
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
18
    In addition, as a special exception, the copyright holders give
19
    permission to link the code of portions of this program with the
20
    OpenSSL library under certain conditions as described in each
21
    individual source file, and distribute linked combinations including
22
    the two.
23
24
    You must obey the GNU General Public License in all respects for all
25
    of the code used other than OpenSSL. If you modify file(s) with this
26
    exception, you may extend this exception to your version of the
27
    file(s), but you are not obligated to do so. If you do not wish to do
28
    so, delete this exception statement from your version. If you delete
29
    this exception statement from all source files in the program, then
30
    also delete it here.
31
*/
32
33
#include <cassert>
34
#include <cerrno>
35
#include <cstdint>
36
#include <cwchar>
37
#include <typeinfo>
38
39
#include "src/terminal/parser.h"
40
41
const Parser::StateFamily Parser::family;
42
43
static void append_or_delete( Parser::ActionPointer act, Parser::Actions& vec )
44
11.3M
{
45
11.3M
  assert( act );
46
47
11.3M
  if ( !act->ignore() ) {
48
8.68M
    vec.push_back( act );
49
8.68M
  }
50
11.3M
}
51
52
void Parser::Parser::input( wchar_t ch, Actions& ret )
53
7.82M
{
54
7.82M
  Transition tx = state->input( ch );
55
56
7.82M
  if ( tx.next_state != NULL ) {
57
1.75M
    append_or_delete( state->exit(), ret );
58
1.75M
  }
59
60
7.82M
  append_or_delete( tx.action, ret );
61
62
7.82M
  if ( tx.next_state != NULL ) {
63
1.75M
    append_or_delete( tx.next_state->enter(), ret );
64
1.75M
    state = tx.next_state;
65
1.75M
  }
66
7.82M
}
67
68
1.31k
Parser::UTF8Parser::UTF8Parser() : parser(), buf_len( 0 )
69
1.31k
{
70
1.31k
  assert( BUF_SIZE >= (size_t)MB_CUR_MAX );
71
1.31k
  buf[0] = '\0';
72
1.31k
}
73
74
void Parser::UTF8Parser::input( char c, Actions& ret )
75
7.82M
{
76
7.82M
  assert( buf_len < BUF_SIZE );
77
78
  /* 1-byte UTF-8 character, aka ASCII?  Cheat. */
79
7.82M
  if ( buf_len == 0 && static_cast<unsigned char>( c ) <= 0x7f ) {
80
7.04M
    parser.input( static_cast<wchar_t>( c ), ret );
81
7.04M
    return;
82
7.04M
  }
83
84
784k
  buf[buf_len++] = c;
85
86
  /* This function will only work in a UTF-8 locale. */
87
784k
  wchar_t pwc;
88
784k
  mbstate_t ps = mbstate_t();
89
90
784k
  size_t total_bytes_parsed = 0;
91
784k
  size_t orig_buf_len = buf_len;
92
93
  /* this routine is somewhat complicated in order to comply with
94
     Unicode 6.0, section 3.9, "Best Practices for using U+FFFD" */
95
96
1.56M
  while ( total_bytes_parsed != orig_buf_len ) {
97
784k
    assert( total_bytes_parsed < orig_buf_len );
98
784k
    assert( buf_len > 0 );
99
784k
    size_t bytes_parsed = mbrtowc( &pwc, buf, buf_len, &ps );
100
101
    /* this returns 0 when n = 0! */
102
103
784k
    if ( bytes_parsed == 0 ) {
104
      /* character was NUL, accept and clear buffer */
105
0
      assert( buf_len == 1 );
106
0
      buf_len = 0;
107
0
      pwc = L'\0';
108
0
      bytes_parsed = 1;
109
784k
    } else if ( bytes_parsed == (size_t)-1 ) {
110
      /* invalid sequence, use replacement character and try again with last char */
111
784k
      assert( errno == EILSEQ );
112
784k
      if ( buf_len > 1 ) {
113
0
        buf[0] = buf[buf_len - 1];
114
0
        bytes_parsed = buf_len - 1;
115
0
        buf_len = 1;
116
784k
      } else {
117
784k
        buf_len = 0;
118
784k
        bytes_parsed = 1;
119
784k
      }
120
784k
      pwc = (wchar_t)0xFFFD;
121
784k
    } else if ( bytes_parsed == (size_t)-2 ) {
122
      /* can't parse incomplete multibyte character */
123
0
      total_bytes_parsed += buf_len;
124
0
      continue;
125
0
    } else {
126
      /* parsed into pwc, accept */
127
0
      assert( bytes_parsed <= buf_len );
128
0
      memmove( buf, buf + bytes_parsed, buf_len - bytes_parsed );
129
0
      buf_len = buf_len - bytes_parsed;
130
0
    }
131
132
    /* Cast to unsigned for checks, because some
133
       platforms (e.g. ARM) use uint32_t as wchar_t,
134
       causing compiler warning on "pwc > 0" check. */
135
784k
    const uint32_t pwcheck = pwc;
136
137
784k
    if ( pwcheck > 0x10FFFF ) { /* outside Unicode range */
138
0
      pwc = (wchar_t)0xFFFD;
139
0
    }
140
141
784k
    if ( ( pwcheck >= 0xD800 ) && ( pwcheck <= 0xDFFF ) ) { /* surrogate code point */
142
      /*
143
        OS X unfortunately allows these sequences without EILSEQ, but
144
        they are ill-formed UTF-8 and we shouldn't repeat them to the
145
        user's terminal.
146
      */
147
0
      pwc = (wchar_t)0xFFFD;
148
0
    }
149
150
784k
    parser.input( pwc, ret );
151
152
784k
    total_bytes_parsed += bytes_parsed;
153
784k
  }
154
784k
}
155
156
0
Parser::Parser::Parser( const Parser& other ) : state( other.state ) {}
157
158
Parser::Parser& Parser::Parser::operator=( const Parser& other )
159
0
{
160
0
  state = other.state;
161
0
  return *this;
162
0
}