Coverage Report

Created: 2023-11-19 06:49

/src/wget2/libwget/css.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2023 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * css parsing routines
22
 *
23
 * Changelog
24
 * 03.07.2012  Tim Ruehsen  created
25
 *
26
 * A parser using the flex tokenizer, created with flex tokens from
27
 *   https://www.w3.org/TR/css3-syntax/
28
 *
29
 * TODO:
30
 *  - since we are just interested in @import ... and url(...), we could use
31
 *    a simplistic hand-written parser which might be much smaller and faster
32
 */
33
34
#include <config.h>
35
36
#include <stddef.h>
37
#include <unistd.h>
38
#include <string.h>
39
#include <c-ctype.h>
40
#include <fcntl.h>
41
#include <sys/stat.h>
42
#ifdef HAVE_MMAP
43
#include <sys/mman.h>
44
#endif
45
46
#include <wget.h>
47
#include "private.h"
48
49
#include "css_tokenizer.h"
50
51
// see css_tokenizer.c
52
typedef void* yyscan_t;
53
int yyget_leng(yyscan_t yyscanner);
54
char *yyget_text(yyscan_t yyscanner);
55
typedef struct yy_buffer_state *YY_BUFFER_STATE;
56
int yylex_init(yyscan_t* scanner);
57
YY_BUFFER_STATE yy_scan_string(const char * yystr, yyscan_t yyscanner);
58
YY_BUFFER_STATE yy_scan_bytes(const char * yystr, int len, yyscan_t yyscanner);
59
int yylex(yyscan_t yyscanner);
60
int yylex_destroy(yyscan_t yyscanner);
61
void *yyalloc(size_t size);
62
void *yyrealloc(void *p, size_t size);
63
64
13.2k
void *yyalloc(size_t size) {
65
13.2k
  return wget_malloc(size);
66
13.2k
}
67
0
void *yyrealloc(void *p, size_t size) {
68
0
  return wget_realloc(p, size);
69
0
}
70
71
void wget_css_parse_buffer(
72
  const char *buf,
73
  size_t len,
74
  wget_css_parse_uri_callback *callback_uri,
75
  wget_css_parse_encoding_callback *callback_encoding,
76
  void *user_ctx)
77
3.30k
{
78
3.30k
  int token;
79
3.30k
  size_t length, pos = 0;
80
3.30k
  char *text;
81
3.30k
  yyscan_t scanner;
82
83
3.30k
  yylex_init(&scanner);
84
3.30k
  yy_scan_bytes(buf, (int) len, scanner);
85
86
75.9k
  while ((token = yylex(scanner)) != CSSEOF) {
87
72.6k
    if (token == IMPORT_SYM) {
88
      // e.g. @import "https://example.com/index.html"
89
2.85k
      pos += yyget_leng(scanner);
90
91
      // skip whitespace before URI/STRING
92
3.66k
      while ((token = yylex(scanner)) == S)
93
813
        pos += yyget_leng(scanner);
94
95
      // now token should be STRING or URI
96
2.85k
      if (token == STRING)
97
1.17k
        token = URI;
98
2.85k
    }
99
100
72.6k
    if (token == URI && callback_uri) {
101
      // e.g. url(https://example.com/index.html)
102
16.6k
      text = yyget_text(scanner);
103
16.6k
      length = yyget_leng(scanner);
104
105
16.6k
      if (*text == '\'' || *text == '\"') {
106
        // a string - remove the quotes
107
791
        callback_uri(user_ctx, text + 1, length - 2, pos + 1);
108
15.8k
      } else {
109
        // extract URI from url(...)
110
15.8k
        if (!wget_strncasecmp_ascii(text, "url(", 4)) {
111
14.6k
          char *otext = text;
112
113
          // remove trailing ) and any spaces before
114
15.9k
          for (length--; c_isspace(text[length - 1]); length--);
115
116
          // remove leading url( and any spaces after
117
15.4k
          for (length -= 4, text += 4; length && c_isspace(*text); text++, length--);
118
119
          // remove quotes
120
14.6k
          if (length && (*text == '\'' || *text == '\"')) {
121
834
            text++;
122
834
            length--;
123
834
          }
124
125
14.6k
          if (length && (text[length - 1] == '\'' || text[length - 1] == '\"'))
126
838
            length--;
127
128
14.6k
          callback_uri(user_ctx, text, length, pos + (text - otext));
129
14.6k
        }
130
15.8k
      }
131
55.9k
    } else if (token == CHARSET_SYM && callback_encoding) {
132
      // e.g. @charset "UTF-8"
133
1.31k
      pos += yyget_leng(scanner);
134
135
      // skip whitespace before charset name
136
1.71k
      while ((token = yylex(scanner)) == S)
137
401
        pos += yyget_leng(scanner);
138
139
      // now token should be STRING
140
1.31k
      if (token == STRING) {
141
789
        text = yyget_text(scanner);
142
789
        length = yyget_leng(scanner);
143
144
789
        if (*text == '\'' || *text == '\"') {
145
          // a string - remove the quotes
146
391
          callback_encoding(user_ctx, text + 1, length - 2);
147
398
        } else {
148
          // a string without quotes
149
398
          callback_encoding(user_ctx, text, length);
150
398
        }
151
789
      } else {
152
521
        error_printf(_("Unknown token after @charset: %d\n"), token);
153
521
      }
154
1.31k
    }
155
72.6k
    pos += yyget_leng(scanner);
156
72.6k
  }
157
158
3.30k
  yylex_destroy(scanner);
159
3.30k
}
160
161
void wget_css_parse_file(
162
  const char *fname,
163
  wget_css_parse_uri_callback *callback_uri,
164
  wget_css_parse_encoding_callback *callback_encoding,
165
  void *user_ctx)
166
0
{
167
0
  if (strcmp(fname,"-")) {
168
0
    int fd;
169
170
0
    if ((fd = open(fname, O_RDONLY|O_BINARY)) != -1) {
171
0
      struct stat st;
172
0
      if (fstat(fd, &st) == 0) {
173
0
#ifdef HAVE_MMAP
174
0
        size_t nread = st.st_size;
175
0
        char *buf = mmap(NULL, nread + 1, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
176
#else
177
        char *buf=wget_malloc(st.st_size+1);
178
        size_t nread=read(fd,buf,st.st_size);
179
#endif
180
181
0
        if (nread > 0) {
182
0
          buf[nread] = 0; // PROT_WRITE allows this write, MAP_PRIVATE prevents changes in underlying file system
183
0
          wget_css_parse_buffer(buf, st.st_size, callback_uri, callback_encoding, user_ctx);
184
0
        }
185
186
0
#ifdef HAVE_MMAP
187
0
        munmap(buf, nread);
188
#else
189
        xfree(buf);
190
#endif
191
0
      }
192
0
      close(fd);
193
0
    } else
194
0
      error_printf(_("Failed to open %s\n"), fname);
195
0
  } else {
196
    // read data from STDIN.
197
    // maybe should use yy_scan_bytes instead of buffering into memory.
198
0
    char tmp[4096];
199
0
    ssize_t nbytes;
200
0
    wget_buffer buf;
201
202
0
    wget_buffer_init(&buf, NULL, 4096);
203
204
0
    while ((nbytes = read(STDIN_FILENO, tmp, sizeof(tmp))) > 0) {
205
0
      wget_buffer_memcat(&buf, tmp, nbytes);
206
0
    }
207
208
0
    if (buf.length)
209
0
      wget_css_parse_buffer(buf.data, buf.length, callback_uri, callback_encoding, user_ctx);
210
211
0
    wget_buffer_deinit(&buf);
212
0
  }
213
0
}