/src/mozilla-central/netwerk/base/nsURLHelper.h

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsURLHelper_h__
#define nsURLHelper_h__

#include "nsString.h"

class nsIFile;
class nsIURLParser;

enum netCoalesceFlags
{
  NET_COALESCE_NORMAL = 0,

  /**
   * retains /../ that reach above dir root (useful for FTP
   * servers in which the root of the FTP URL is not necessarily
   * the root of the FTP filesystem).
   */
  NET_COALESCE_ALLOW_RELATIVE_ROOT = 1<<0,

  /**
   * recognizes /%2F and // as markers for the root directory
   * and handles them properly.
   */
  NET_COALESCE_DOUBLE_SLASH_IS_ROOT = 1<<1
};

//----------------------------------------------------------------------------
// This module contains some private helper functions related to URL parsing.
//----------------------------------------------------------------------------

/* shutdown frees URL parser */
void net_ShutdownURLHelper();
#ifdef XP_MACOSX
void net_ShutdownURLHelperOSX();
#endif

/* access URL parsers */
nsIURLParser * net_GetAuthURLParser();
nsIURLParser * net_GetNoAuthURLParser();
nsIURLParser * net_GetStdURLParser();

/* convert between nsIFile and file:// URL spec
 * net_GetURLSpecFromFile does an extra stat, so callers should
 * avoid it if possible in favor of net_GetURLSpecFromActualFile
 * and net_GetURLSpecFromDir */
nsresult net_GetURLSpecFromFile(nsIFile *, nsACString &);
nsresult net_GetURLSpecFromDir(nsIFile *, nsACString &);
nsresult net_GetURLSpecFromActualFile(nsIFile *, nsACString &);
nsresult net_GetFileFromURLSpec(const nsACString &, nsIFile **);

/* extract file path components from file:// URL */
nsresult net_ParseFileURL(const nsACString &inURL,
                                      nsACString &outDirectory,
                                      nsACString &outFileBaseName,
                                      nsACString &outFileExtension);

/* handle .. in dirs while resolving URLs (path is UTF-8) */
void net_CoalesceDirs(netCoalesceFlags flags, char* path);

/**
 * Resolves a relative path string containing "." and ".."
 * with respect to a base path (assumed to already be resolved).
 * For example, resolving "../../foo/./bar/../baz.html" w.r.t.
 * "/a/b/c/d/e/" yields "/a/b/c/foo/baz.html". Attempting to
 * ascend above the base results in the NS_ERROR_MALFORMED_URI
 * exception. If basePath is null, it treats it as "/".
 *
 * @param relativePath  a relative URI
 * @param basePath      a base URI
 *
 * @return a new string, representing canonical uri
 */
nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                             const nsACString &basePath,
                                             nsACString &result);

/**
 * Check if a URL is absolute
 *
 * @param inURL     URL spec
 * @return true if the given spec represents an absolute URL
 */
bool net_IsAbsoluteURL(const nsACString& inURL);

/**
 * Extract URI-Scheme if possible
 *
 * @param inURI     URI spec
 * @param scheme    scheme copied to this buffer on return. Is lowercase.
 */
nsresult net_ExtractURLScheme(const nsACString &inURI,
                              nsACString &scheme);

/* check that the given scheme conforms to RFC 2396 */
bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);

inline bool net_IsValidScheme(const nsCString& scheme)
{
    return net_IsValidScheme(scheme.get(), scheme.Length());
}

/**
 * This function strips out all C0 controls and space at the beginning and end
 * of the URL and filters out \r, \n, \t from the middle of the URL.  This makes
 * it safe to call on things like javascript: urls or data: urls, where we may
 * in fact run into whitespace that is not properly encoded.
 *
 * @param input the URL spec we want to filter
 * @param result the out param to write to if filtering happens
 */
void net_FilterURIString(const nsACString& input, nsACString& result);

/**
 * This function performs character stripping just like net_FilterURIString,
 * with the added benefit of also performing percent escaping of dissallowed
 * characters, all in one pass. Saving one pass is very important when operating
 * on really large strings.
 *
 * @param aInput the URL spec we want to filter
 * @param aFlags the flags which control which characters we escape
 * @param aResult the out param to write to if filtering happens
 */
nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult);

#if defined(XP_WIN)
/**
 * On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
 * forward-slash.  This function maps any back-slashes to forward-slashes.
 *
 * @param aURL
 *        The URL string to normalize (UTF-8 encoded).  This can be a
 *        relative URL segment.
 * @param aResultBuf
 *        The resulting string is appended to this string.  If the input URL
 *        is already normalized, then aResultBuf is unchanged.
 *
 * @returns false if aURL is already normalized.  Otherwise, returns true.
 */
bool net_NormalizeFileURL(const nsACString &aURL,
                                        nsCString &aResultBuf);
#endif

/*****************************************************************************
 * generic string routines follow (XXX move to someplace more generic).
 */

/* convert to lower case */
void net_ToLowerCase(char* str, uint32_t length);
void net_ToLowerCase(char* str);

/**
 * returns pointer to first character of |str| in the given set.  if not found,
 * then |end| is returned.  stops prematurely if a null byte is encountered,
 * and returns the address of the null byte.
 */
char * net_FindCharInSet(const char *str, const char *end, const char *set);

/**
 * returns pointer to first character of |str| NOT in the given set.  if all
 * characters are in the given set, then |end| is returned.  if '\0' is not
 * included in |set|, then stops prematurely if a null byte is encountered,
 * and returns the address of the null byte.
 */
char * net_FindCharNotInSet(const char *str, const char *end, const char *set);

/**
 * returns pointer to last character of |str| NOT in the given set.  if all
 * characters are in the given set, then |str - 1| is returned.
 */
char * net_RFindCharNotInSet(const char *str, const char *end, const char *set);

/**
 * Parses a content-type header and returns the content type and
 * charset (if any).  aCharset is not modified if no charset is
 * specified in anywhere in aHeaderStr.  In that case (no charset
 * specified), aHadCharset is set to false.  Otherwise, it's set to
 * true.  Note that aContentCharset can be empty even if aHadCharset
 * is true.
 *
 * This parsing is suitable for HTTP request.  Use net_ParseContentType
 * for parsing this header in HTTP responses.
 */
void net_ParseRequestContentType(const nsACString &aHeaderStr,
                                 nsACString       &aContentType,
                                 nsACString       &aContentCharset,
                                 bool*          aHadCharset);

/**
 * Parses a content-type header and returns the content type and
 * charset (if any).  aCharset is not modified if no charset is
 * specified in anywhere in aHeaderStr.  In that case (no charset
 * specified), aHadCharset is set to false.  Otherwise, it's set to
 * true.  Note that aContentCharset can be empty even if aHadCharset
 * is true.
 */
void net_ParseContentType(const nsACString &aHeaderStr,
                          nsACString       &aContentType,
                          nsACString       &aContentCharset,
                          bool*          aHadCharset);
/**
 * As above, but also returns the start and end indexes for the charset
 * parameter in aHeaderStr.  These are indices for the entire parameter, NOT
 * just the value.  If there is "effectively" no charset parameter (e.g. if an
 * earlier type with one is overridden by a later type without one),
 * *aHadCharset will be true but *aCharsetStart will be set to -1.  Note that
 * it's possible to have aContentCharset empty and *aHadCharset true when
 * *aCharsetStart is nonnegative; this corresponds to charset="".
 */
void net_ParseContentType(const nsACString &aHeaderStr,
                          nsACString       &aContentType,
                          nsACString       &aContentCharset,
                          bool             *aHadCharset,
                          int32_t          *aCharsetStart,
                          int32_t          *aCharsetEnd);

/* inline versions */

/* remember the 64-bit platforms ;-) */
#define NET_MAX_ADDRESS ((char*)UINTPTR_MAX)

inline char *net_FindCharInSet(const char *str, const char *set)
{
    return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
}
inline char *net_FindCharNotInSet(const char *str, const char *set)
{
    return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
}
inline char *net_RFindCharNotInSet(const char *str, const char *set)
{
    return net_RFindCharNotInSet(str, str + strlen(str), set);
}

/**
 * This function returns true if the given hostname does not include any
 * restricted characters.  Otherwise, false is returned.
 */
bool net_IsValidHostName(const nsACString& host);

/**
 * Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
 */
bool net_IsValidIPv4Addr(const char *addr, int32_t addrLen);

/**
 * Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
 */
bool net_IsValidIPv6Addr(const char *addr, int32_t addrLen);


/**
 * Returns the max length of a URL. The default is 1048576 (1 MB).
 * Can be changed by pref "network.standard-url.max-length"
 */
int32_t net_GetURLMaxLength();

#endif // !nsURLHelper_h__

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -- */
2		/* This Source Code Form is subject to the terms of the Mozilla Public
3		* License, v. 2.0. If a copy of the MPL was not distributed with this
4		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6		#ifndef nsURLHelper_h__
7		#define nsURLHelper_h__
8
9		#include "nsString.h"
10
11		class nsIFile;
12		class nsIURLParser;
13
14		enum netCoalesceFlags
15		{
16		NET_COALESCE_NORMAL = 0,
17
18		/**
19		* retains /../ that reach above dir root (useful for FTP
20		* servers in which the root of the FTP URL is not necessarily
21		* the root of the FTP filesystem).
22		*/
23		NET_COALESCE_ALLOW_RELATIVE_ROOT = 1<<0,
24
25		/**
26		* recognizes /%2F and // as markers for the root directory
27		* and handles them properly.
28		*/
29		NET_COALESCE_DOUBLE_SLASH_IS_ROOT = 1<<1
30		};
31
32		//----------------------------------------------------------------------------
33		// This module contains some private helper functions related to URL parsing.
34		//----------------------------------------------------------------------------
35
36		/* shutdown frees URL parser */
37		void net_ShutdownURLHelper();
38		#ifdef XP_MACOSX
39		void net_ShutdownURLHelperOSX();
40		#endif
41
42		/* access URL parsers */
43		nsIURLParser * net_GetAuthURLParser();
44		nsIURLParser * net_GetNoAuthURLParser();
45		nsIURLParser * net_GetStdURLParser();
46
47		/* convert between nsIFile and file:// URL spec
48		* net_GetURLSpecFromFile does an extra stat, so callers should
49		* avoid it if possible in favor of net_GetURLSpecFromActualFile
50		* and net_GetURLSpecFromDir */
51		nsresult net_GetURLSpecFromFile(nsIFile *, nsACString &);
52		nsresult net_GetURLSpecFromDir(nsIFile *, nsACString &);
53		nsresult net_GetURLSpecFromActualFile(nsIFile *, nsACString &);
54		nsresult net_GetFileFromURLSpec(const nsACString &, nsIFile **);
55
56		/* extract file path components from file:// URL */
57		nsresult net_ParseFileURL(const nsACString &inURL,
58		nsACString &outDirectory,
59		nsACString &outFileBaseName,
60		nsACString &outFileExtension);
61
62		/* handle .. in dirs while resolving URLs (path is UTF-8) */
63		void net_CoalesceDirs(netCoalesceFlags flags, char* path);
64
65		/**
66		* Resolves a relative path string containing "." and ".."
67		* with respect to a base path (assumed to already be resolved).
68		* For example, resolving "../../foo/./bar/../baz.html" w.r.t.
69		* "/a/b/c/d/e/" yields "/a/b/c/foo/baz.html". Attempting to
70		* ascend above the base results in the NS_ERROR_MALFORMED_URI
71		* exception. If basePath is null, it treats it as "/".
72		*
73		* @param relativePath a relative URI
74		* @param basePath a base URI
75		*
76		* @return a new string, representing canonical uri
77		*/
78		nsresult net_ResolveRelativePath(const nsACString &relativePath,
79		const nsACString &basePath,
80		nsACString &result);
81
82		/**
83		* Check if a URL is absolute
84		*
85		* @param inURL URL spec
86		* @return true if the given spec represents an absolute URL
87		*/
88		bool net_IsAbsoluteURL(const nsACString& inURL);
89
90		/**
91		* Extract URI-Scheme if possible
92		*
93		* @param inURI URI spec
94		* @param scheme scheme copied to this buffer on return. Is lowercase.
95		*/
96		nsresult net_ExtractURLScheme(const nsACString &inURI,
97		nsACString &scheme);
98
99		/* check that the given scheme conforms to RFC 2396 */
100		bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
101
102		inline bool net_IsValidScheme(const nsCString& scheme)
103	0	{
104	0	return net_IsValidScheme(scheme.get(), scheme.Length());
105	0	}
106
107		/**
108		* This function strips out all C0 controls and space at the beginning and end
109		* of the URL and filters out \r, \n, \t from the middle of the URL. This makes
110		* it safe to call on things like javascript: urls or data: urls, where we may
111		* in fact run into whitespace that is not properly encoded.
112		*
113		* @param input the URL spec we want to filter
114		* @param result the out param to write to if filtering happens
115		*/
116		void net_FilterURIString(const nsACString& input, nsACString& result);
117
118		/**
119		* This function performs character stripping just like net_FilterURIString,
120		* with the added benefit of also performing percent escaping of dissallowed
121		* characters, all in one pass. Saving one pass is very important when operating
122		* on really large strings.
123		*
124		* @param aInput the URL spec we want to filter
125		* @param aFlags the flags which control which characters we escape
126		* @param aResult the out param to write to if filtering happens
127		*/
128		nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult);
129
130		#if defined(XP_WIN)
131		/**
132		* On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
133		* forward-slash. This function maps any back-slashes to forward-slashes.
134		*
135		* @param aURL
136		* The URL string to normalize (UTF-8 encoded). This can be a
137		* relative URL segment.
138		* @param aResultBuf
139		* The resulting string is appended to this string. If the input URL
140		* is already normalized, then aResultBuf is unchanged.
141		*
142		* @returns false if aURL is already normalized. Otherwise, returns true.
143		*/
144		bool net_NormalizeFileURL(const nsACString &aURL,
145		nsCString &aResultBuf);
146		#endif
147
148		/*****************************************************************************
149		* generic string routines follow (XXX move to someplace more generic).
150		*/
151
152		/* convert to lower case */
153		void net_ToLowerCase(char* str, uint32_t length);
154		void net_ToLowerCase(char* str);
155
156		/**
157		* returns pointer to first character of \|str\| in the given set. if not found,
158		* then \|end\| is returned. stops prematurely if a null byte is encountered,
159		* and returns the address of the null byte.
160		*/
161		char * net_FindCharInSet(const char str, const char end, const char *set);
162
163		/**
164		* returns pointer to first character of \|str\| NOT in the given set. if all
165		* characters are in the given set, then \|end\| is returned. if '\0' is not
166		* included in \|set\|, then stops prematurely if a null byte is encountered,
167		* and returns the address of the null byte.
168		*/
169		char * net_FindCharNotInSet(const char str, const char end, const char *set);
170
171		/**
172		* returns pointer to last character of \|str\| NOT in the given set. if all
173		* characters are in the given set, then \|str - 1\| is returned.
174		*/
175		char * net_RFindCharNotInSet(const char str, const char end, const char *set);
176
177		/**
178		* Parses a content-type header and returns the content type and
179		* charset (if any). aCharset is not modified if no charset is
180		* specified in anywhere in aHeaderStr. In that case (no charset
181		* specified), aHadCharset is set to false. Otherwise, it's set to
182		* true. Note that aContentCharset can be empty even if aHadCharset
183		* is true.
184		*
185		* This parsing is suitable for HTTP request. Use net_ParseContentType
186		* for parsing this header in HTTP responses.
187		*/
188		void net_ParseRequestContentType(const nsACString &aHeaderStr,
189		nsACString &aContentType,
190		nsACString &aContentCharset,
191		bool* aHadCharset);
192
193		/**
194		* Parses a content-type header and returns the content type and
195		* charset (if any). aCharset is not modified if no charset is
196		* specified in anywhere in aHeaderStr. In that case (no charset
197		* specified), aHadCharset is set to false. Otherwise, it's set to
198		* true. Note that aContentCharset can be empty even if aHadCharset
199		* is true.
200		*/
201		void net_ParseContentType(const nsACString &aHeaderStr,
202		nsACString &aContentType,
203		nsACString &aContentCharset,
204		bool* aHadCharset);
205		/**
206		* As above, but also returns the start and end indexes for the charset
207		* parameter in aHeaderStr. These are indices for the entire parameter, NOT
208		* just the value. If there is "effectively" no charset parameter (e.g. if an
209		* earlier type with one is overridden by a later type without one),
210		* aHadCharset will be true but aCharsetStart will be set to -1. Note that
211		* it's possible to have aContentCharset empty and *aHadCharset true when
212		* *aCharsetStart is nonnegative; this corresponds to charset="".
213		*/
214		void net_ParseContentType(const nsACString &aHeaderStr,
215		nsACString &aContentType,
216		nsACString &aContentCharset,
217		bool *aHadCharset,
218		int32_t *aCharsetStart,
219		int32_t *aCharsetEnd);
220
221		/* inline versions */
222
223		/* remember the 64-bit platforms ;-) */
224		#define NET_MAX_ADDRESS ((char*)UINTPTR_MAX)
225
226		inline char net_FindCharInSet(const char str, const char *set)
227	0	{
228	0	return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
229	0	}
230		inline char net_FindCharNotInSet(const char str, const char *set)
231	0	{
232	0	return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
233	0	}
234		inline char net_RFindCharNotInSet(const char str, const char *set)
235	0	{
236	0	return net_RFindCharNotInSet(str, str + strlen(str), set);
237	0	}
238
239		/**
240		* This function returns true if the given hostname does not include any
241		* restricted characters. Otherwise, false is returned.
242		*/
243		bool net_IsValidHostName(const nsACString& host);
244
245		/**
246		* Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
247		*/
248		bool net_IsValidIPv4Addr(const char *addr, int32_t addrLen);
249
250		/**
251		* Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
252		*/
253		bool net_IsValidIPv6Addr(const char *addr, int32_t addrLen);
254
255
256		/**
257		* Returns the max length of a URL. The default is 1048576 (1 MB).
258		* Can be changed by pref "network.standard-url.max-length"
259		*/
260		int32_t net_GetURLMaxLength();
261
262		#endif // !nsURLHelper_h__