/src/mupdf/source/xps/xps-util.c

Source
// Copyright (C) 2004-2021 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.

#include "mupdf/fitz.h"
#include "xps-imp.h"

static inline int xps_tolower(int c)
{
  if (c >= 'A' && c <= 'Z')
    return c + 32;
  return c;
}

int
xps_strcasecmp(char *a, char *b)
{
  while (xps_tolower(*a) == xps_tolower(*b))
  {
    if (*a++ == 0)
      return 0;
    b++;
  }
  return xps_tolower(*a) - xps_tolower(*b);
}

/* A URL is defined as consisting of a:
 * SCHEME (e.g. http:)
 * AUTHORITY (username, password, hostname, port, eg //test:passwd@mupdf.com:999)
 * PATH (e.g. /download)
 * QUERY (e.g. ?view=page)
 * FRAGMENT (e.g. #fred) (not strictly part of the URL)
 */
static char *
skip_scheme(char *path)
{
  char *p = path;

  /* Skip over: alpha *(alpha | digit | "+" | "-" | ".") looking for : */
  if (*p >= 'a' && *p <= 'z')
  {
    /* Starts with a-z */
  }
  else if (*p >= 'A' && *p <= 'Z')
  {
    /* Starts with A-Z */
  }
  else
    return path;

  while (*++p)
  {
    if (*p >= 'a' && *p <= 'z')
      continue;
    if (*p >= 'A' && *p <= 'Z')
      continue;
    if (*p >= '0' && *p <= '9')
      continue;
    if (*p == '+')
      continue;
    if (*p == '-')
      continue;
    if (*p == '.')
      continue;
    if (*p == ':')
      return p+1;
    break;
  }
  return path;
}

static char *
skip_authority(char *path)
{
  char *p = path;

  /* Authority section must start with '//' */
  if (p[0] != '/' || p[1] != '/')
    return path;
  p += 2;

  /* Authority is terminated by end of URL, '/' or '?' */
  while (*p && *p != '/' && *p != '?')
    p++;

  return p;
}

#define SEP(x) ((x)=='/' || (x) == 0)

static char *
clean_path(char *name)
{
  char *p, *q, *dotdot, *start;
  int rooted;

  start = skip_scheme(name);
  start = skip_authority(start);
  rooted = start[0] == '/';

  /*
   * invariants:
   *    p points at beginning of path element we're considering.
   *    q points just past the last path element we wrote (no slash).
   *    dotdot points just past the point where .. cannot backtrack
   *        any further (no slash).
   */
  p = q = dotdot = start + rooted;
  while (*p)
  {
    if(p[0] == '/') /* null element */
      p++;
    else if (p[0] == '.' && SEP(p[1]))
      p += 1; /* don't count the separator in case it is nul */
    else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
    {
      p += 2;
      if (q > dotdot) /* can backtrack */
      {
        while(--q > dotdot && *q != '/')
          ;
      }
      else if (!rooted) /* /.. is / but ./../ is .. */
      {
        if (q != start)
          *q++ = '/';
        *q++ = '.';
        *q++ = '.';
        dotdot = q;
      }
    }
    else /* real path element */
    {
      if (q != start+rooted)
        *q++ = '/';
      while ((*q = *p) != '/' && *q != 0)
        p++, q++;
    }
  }

  /* Protect against 'blah:' input, where start = q = the terminator.
   * We must not overrun it. */
  if (q == start && *q != 0) /* empty string is really "." */
    *q++ = '.';
  *q = '\0';

  return name;
}

void
xps_resolve_url(fz_context *ctx, xps_document *doc, char *output, char *base_uri, char *path, int output_size)
{
  char *p = skip_authority(skip_scheme(path));

  if (p != path || path[0] == '/')
  {
    fz_strlcpy(output, path, output_size);
  }
  else
  {
    size_t len = fz_strlcpy(output, base_uri, output_size);
    if (len == 0 || output[len-1] != '/')
      fz_strlcat(output, "/", output_size);
    fz_strlcat(output, path, output_size);
  }
  clean_path(output);
}

Line	Count	Source
1		// Copyright (C) 2004-2021 Artifex Software, Inc.
2		//
3		// This file is part of MuPDF.
4		//
5		// MuPDF is free software: you can redistribute it and/or modify it under the
6		// terms of the GNU Affero General Public License as published by the Free
7		// Software Foundation, either version 3 of the License, or (at your option)
8		// any later version.
9		//
10		// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11		// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12		// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13		// details.
14		//
15		// You should have received a copy of the GNU Affero General Public License
16		// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17		//
18		// Alternative licensing terms are available from the licensor.
19		// For commercial licensing, see <https://www.artifex.com/> or contact
20		// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21		// CA 94129, USA, for further information.
22
23		#include "mupdf/fitz.h"
24		#include "xps-imp.h"
25
26		static inline int xps_tolower(int c)
27	0	{
28	0	if (c >= 'A' && c <= 'Z')
29	0	return c + 32;
30	0	return c;
31	0	}
32
33		int
34		xps_strcasecmp(char a, char b)
35	0	{
36	0	while (xps_tolower(a) == xps_tolower(b))
37	0	{
38	0	if (*a++ == 0)
39	0	return 0;
40	0	b++;
41	0	}
42	0	return xps_tolower(a) - xps_tolower(b);
43	0	}
44
45		/* A URL is defined as consisting of a:
46		* SCHEME (e.g. http:)
47		* AUTHORITY (username, password, hostname, port, eg //test:passwd@mupdf.com:999)
48		* PATH (e.g. /download)
49		* QUERY (e.g. ?view=page)
50		* FRAGMENT (e.g. #fred) (not strictly part of the URL)
51		*/
52		static char *
53		skip_scheme(char *path)
54	0	{
55	0	char *p = path;
56
57		/* Skip over: alpha (alpha \| digit \| "+" \| "-" \| ".") looking for : /
58	0	if (p >= 'a' && p <= 'z')
59	0	{
60		/* Starts with a-z */
61	0	}
62	0	else if (p >= 'A' && p <= 'Z')
63	0	{
64		/* Starts with A-Z */
65	0	}
66	0	else
67	0	return path;
68
69	0	while (*++p)
70	0	{
71	0	if (p >= 'a' && p <= 'z')
72	0	continue;
73	0	if (p >= 'A' && p <= 'Z')
74	0	continue;
75	0	if (p >= '0' && p <= '9')
76	0	continue;
77	0	if (*p == '+')
78	0	continue;
79	0	if (*p == '-')
80	0	continue;
81	0	if (*p == '.')
82	0	continue;
83	0	if (*p == ':')
84	0	return p+1;
85	0	break;
86	0	}
87	0	return path;
88	0	}
89
90		static char *
91		skip_authority(char *path)
92	0	{
93	0	char *p = path;
94
95		/* Authority section must start with '//' */
96	0	if (p[0] != '/' \|\| p[1] != '/')
97	0	return path;
98	0	p += 2;
99
100		/* Authority is terminated by end of URL, '/' or '?' */
101	0	while (p && p != '/' && *p != '?')
102	0	p++;
103
104	0	return p;
105	0	}
106
107	0	#define SEP(x) ((x)=='/' \|\| (x) == 0)
108
109		static char *
110		clean_path(char *name)
111	0	{
112	0	char p, q, dotdot, start;
113	0	int rooted;
114
115	0	start = skip_scheme(name);
116	0	start = skip_authority(start);
117	0	rooted = start[0] == '/';
118
119		/*
120		* invariants:
121		* p points at beginning of path element we're considering.
122		* q points just past the last path element we wrote (no slash).
123		* dotdot points just past the point where .. cannot backtrack
124		* any further (no slash).
125		*/
126	0	p = q = dotdot = start + rooted;
127	0	while (*p)
128	0	{
129	0	if(p[0] == '/') /* null element */
130	0	p++;
131	0	else if (p[0] == '.' && SEP(p[1]))
132	0	p += 1; /* don't count the separator in case it is nul */
133	0	else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
134	0	{
135	0	p += 2;
136	0	if (q > dotdot) /* can backtrack */
137	0	{
138	0	while(--q > dotdot && *q != '/')
139	0	;
140	0	}
141	0	else if (!rooted) /* /.. is / but ./../ is .. */
142	0	{
143	0	if (q != start)
144	0	*q++ = '/';
145	0	*q++ = '.';
146	0	*q++ = '.';
147	0	dotdot = q;
148	0	}
149	0	}
150	0	else /* real path element */
151	0	{
152	0	if (q != start+rooted)
153	0	*q++ = '/';
154	0	while ((q = p) != '/' && *q != 0)
155	0	p++, q++;
156	0	}
157	0	}
158
159		/* Protect against 'blah:' input, where start = q = the terminator.
160		* We must not overrun it. */
161	0	if (q == start && q != 0) / empty string is really "." */
162	0	*q++ = '.';
163	0	*q = '\0';
164
165	0	return name;
166	0	}
167
168		void
169		xps_resolve_url(fz_context ctx, xps_document doc, char output, char base_uri, char *path, int output_size)
170	0	{
171	0	char *p = skip_authority(skip_scheme(path));
172
173	0	if (p != path \|\| path[0] == '/')
174	0	{
175	0	fz_strlcpy(output, path, output_size);
176	0	}
177	0	else
178	0	{
179	0	size_t len = fz_strlcpy(output, base_uri, output_size);
180	0	if (len == 0 \|\| output[len-1] != '/')
181	0	fz_strlcat(output, "/", output_size);
182	0	fz_strlcat(output, path, output_size);
183	0	}
184	0	clean_path(output);
185	0	}

Coverage Report

Created: 2026-03-31 07:17