Coverage Report

Created: 2026-01-31 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wget2/libwget/rss_url.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2013 Tim Ruehsen
3
 * Copyright (c) 2015-2024 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * Extracting URLs from RSS feeds (https://cyber.harvard.edu/rss/rss.html)
22
 *
23
 * Changelog
24
 * 21.12.2013  Tim Ruehsen  created
25
 *
26
 */
27
28
#include <config.h>
29
30
#include <unistd.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include <c-ctype.h>
34
35
#include <wget.h>
36
#include "private.h"
37
38
struct rss_context {
39
  wget_vector
40
    *urls;
41
};
42
43
static void rss_get_url(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos WGET_GCC_UNUSED)
44
98.0k
{
45
98.0k
  struct rss_context *ctx = context;
46
98.0k
  wget_string * url;
47
48
98.0k
  if (!val || !len)
49
78.4k
    return;
50
51
19.6k
  if ((flags & XML_FLG_ATTRIBUTE)) {
52
3.96k
    if (!wget_strcasecmp_ascii(attr, "url") || !wget_strcasecmp_ascii(attr, "href")
53
3.50k
      || !wget_strcasecmp_ascii(attr, "src") || !wget_strcasecmp_ascii(attr, "domain")
54
2.10k
      || !wget_strcasecmp_ascii(attr, "xmlns") || !wget_strncasecmp_ascii(attr, "xmlns:", 6))
55
2.43k
    {
56
2.82k
      for (;len && c_isspace(*val); val++, len--); // skip leading spaces
57
2.81k
      for (;len && c_isspace(val[len - 1]); len--);  // skip trailing spaces
58
59
2.43k
      if (!(url = wget_malloc(sizeof(wget_string))))
60
0
        return;
61
62
2.43k
      url->p = val;
63
2.43k
      url->len = len;
64
65
2.43k
      if (!ctx->urls)
66
122
        ctx->urls = wget_vector_create(32, NULL);
67
68
2.43k
      wget_vector_add(ctx->urls, url);
69
2.43k
    }
70
3.96k
  }
71
15.6k
  else if ((flags & XML_FLG_CONTENT)) {
72
15.1k
    const char *elem = strrchr(dir, '/');
73
74
15.1k
    if (elem) {
75
15.1k
      elem++;
76
77
15.1k
      if (!wget_strcasecmp_ascii(elem, "guid") || !wget_strcasecmp_ascii(elem, "link")
78
14.6k
         || !wget_strcasecmp_ascii(elem, "comments") || !wget_strcasecmp_ascii(elem, "docs"))
79
1.17k
      {
80
2.06k
        for (;len && c_isspace(*val); val++, len--); // skip leading spaces
81
1.93k
        for (;len && c_isspace(val[len - 1]); len--);  // skip trailing spaces
82
83
        // debug_printf("#2 %02X %s %s '%.*s' %zd\n", flags, dir, attr, (int) len, val, len);
84
85
1.17k
        if (!(url = wget_malloc(sizeof(wget_string))))
86
0
          return;
87
88
1.17k
        url->p = val;
89
1.17k
        url->len = len;
90
91
1.17k
        if (!ctx->urls)
92
86
          ctx->urls = wget_vector_create(32, NULL);
93
94
1.17k
        wget_vector_add(ctx->urls, url);
95
1.17k
      }
96
15.1k
    }
97
15.1k
  }
98
19.6k
}
99
100
void wget_rss_get_urls_inline(const char *rss, wget_vector **urls)
101
1.38k
{
102
1.38k
  struct rss_context context = { .urls = NULL };
103
104
1.38k
  wget_xml_parse_buffer(rss, rss_get_url, &context, XML_HINT_REMOVE_EMPTY_CONTENT);
105
106
1.38k
  *urls = context.urls;
107
1.38k
}