/src/wget2/libwget/css_url.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2013 Tim Ruehsen |
3 | | * Copyright (c) 2015-2024 Free Software Foundation, Inc. |
4 | | * |
5 | | * This file is part of libwget. |
6 | | * |
7 | | * Libwget is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as published by |
9 | | * the Free Software Foundation, either version 3 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * Libwget is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libwget. If not, see <https://www.gnu.org/licenses/>. |
19 | | * |
20 | | * |
21 | | * Higher level CSS parsing routines |
22 | | * |
23 | | * Changelog |
24 | | * 15.01.2013 Tim Ruehsen created |
25 | | * |
26 | | */ |
27 | | |
28 | | #include <config.h> |
29 | | |
30 | | #include <unistd.h> |
31 | | #include <stdlib.h> |
32 | | #include <string.h> |
33 | | #include <assert.h> |
34 | | |
35 | | #include <wget.h> |
36 | | #include "private.h" |
37 | | |
38 | | typedef struct { |
39 | | const char |
40 | | **encoding; |
41 | | wget_vector |
42 | | *uris; |
43 | | } css_context; |
44 | | |
45 | | static void url_free(void *url) |
46 | 15.3k | { |
47 | 15.3k | wget_css_parsed_url *u = url; |
48 | | |
49 | 15.3k | xfree(u->url); |
50 | 15.3k | xfree(u->abs_url); |
51 | 15.3k | xfree(u); |
52 | 15.3k | } |
53 | | |
54 | | // Callback function, called from CSS parser for each @charset found. |
55 | | static void get_encoding(void *context, const char *encoding, size_t len) |
56 | 885 | { |
57 | 885 | css_context *ctx = context; |
58 | | |
59 | | // take only the first @charset rule |
60 | 885 | if (!*ctx->encoding) { |
61 | 32 | *ctx->encoding = wget_strmemdup(encoding, len); |
62 | 32 | debug_printf("URI content encoding = '%s'\n", *ctx->encoding); |
63 | 32 | } |
64 | 885 | } |
65 | | |
66 | | // Callback function, called from CSS parser for each URI found. |
67 | | static void get_url(void *context, const char *url, size_t len, size_t pos) |
68 | 15.3k | { |
69 | 15.3k | css_context *ctx = context; |
70 | 15.3k | wget_css_parsed_url *parsed_url; |
71 | | |
72 | 15.3k | if (!(parsed_url = wget_calloc(1, sizeof(wget_css_parsed_url)))) |
73 | 0 | return; |
74 | | |
75 | 15.3k | if (!(parsed_url->url = wget_strmemdup(url, len))) { |
76 | 0 | xfree(parsed_url); |
77 | 0 | return; |
78 | 0 | } |
79 | | |
80 | 15.3k | parsed_url->len = len; |
81 | 15.3k | parsed_url->pos = pos; |
82 | | |
83 | 15.3k | if (!ctx->uris) { |
84 | 551 | ctx->uris = wget_vector_create(16, NULL); |
85 | 551 | wget_vector_set_destructor(ctx->uris, url_free); |
86 | 551 | } |
87 | | |
88 | 15.3k | wget_vector_add(ctx->uris, parsed_url); |
89 | 15.3k | } |
90 | | |
91 | | static void urls_to_absolute(wget_vector *urls, wget_iri *base) |
92 | 1.25k | { |
93 | 1.25k | if (base && urls) { |
94 | 551 | wget_buffer buf; |
95 | 551 | wget_buffer_init(&buf, NULL, 1024); |
96 | | |
97 | 15.8k | for (int it = 0; it < wget_vector_size(urls); it++) { |
98 | 15.3k | wget_css_parsed_url *url = wget_vector_get(urls, it); |
99 | 15.3k | assert(url != NULL); |
100 | | |
101 | 15.3k | if (wget_iri_relative_to_abs(base, url->url, url->len, &buf)) |
102 | 15.3k | url->abs_url = wget_strmemdup(buf.data, buf.length); |
103 | 0 | else |
104 | 0 | error_printf(_("Cannot resolve relative URI '%s'\n"), url->url); |
105 | 15.3k | } |
106 | | |
107 | 551 | wget_buffer_deinit(&buf); |
108 | 551 | } |
109 | 1.25k | } |
110 | | |
111 | | wget_vector *wget_css_get_urls(const char *css, size_t len, wget_iri *base, const char **encoding) |
112 | 1.25k | { |
113 | 1.25k | css_context context = { .encoding = encoding }; |
114 | | |
115 | 1.25k | wget_css_parse_buffer(css, len, get_url, encoding ? get_encoding : NULL, &context); |
116 | 1.25k | urls_to_absolute(context.uris, base); |
117 | | |
118 | 1.25k | return context.uris; |
119 | 1.25k | } |
120 | | |
121 | | wget_vector *wget_css_get_urls_from_localfile(const char *fname, wget_iri *base, const char **encoding) |
122 | 0 | { |
123 | 0 | css_context context = { .encoding = encoding }; |
124 | |
|
125 | 0 | wget_css_parse_file(fname, get_url, encoding ? get_encoding : NULL, &context); |
126 | 0 | urls_to_absolute(context.uris, base); |
127 | |
|
128 | 0 | return context.uris; |
129 | 0 | } |