Line data Source code
1 : #include "fd_url.h"
2 : #include "../../util/cstr/fd_cstr.h"
3 : #include "../../util/log/fd_log.h"
4 :
5 : fd_url_t *
6 : fd_url_parse_cstr( fd_url_t * const url,
7 : char const * const url_str,
8 : ulong const url_str_len,
9 0 : int * opt_err ) {
10 0 : int err_[1];
11 0 : if( !opt_err ) opt_err = err_;
12 0 : *opt_err = FD_URL_SUCCESS;
13 :
14 0 : char const * const url_end = url_str+url_str_len;
15 :
16 0 : char const * const scheme = url_str;
17 0 : ulong scheme_len = 0UL;
18 0 : if( FD_UNLIKELY( url_str_len<8UL ) ) return NULL;
19 0 : if( fd_memeq( scheme, "http://", 7 ) ) {
20 0 : scheme_len = 7;
21 0 : } else if( fd_memeq( scheme, "https://", 8 ) ) {
22 0 : scheme_len = 8;
23 0 : } else {
24 0 : *opt_err = FD_URL_ERR_SCHEME;
25 0 : return NULL;
26 0 : }
27 :
28 0 : char const * const authority = scheme+scheme_len;
29 :
30 : /* Find beginning of path */
31 0 : char const * authority_end;
32 0 : for( authority_end = authority;
33 0 : authority_end < url_end && *authority_end!='/';
34 0 : authority_end++ ) {
35 0 : if( FD_UNLIKELY( *authority_end=='@' ) ) {
36 0 : *opt_err = FD_URL_ERR_USERINFO;
37 0 : return NULL; /* userinfo not supported */
38 0 : }
39 0 : }
40 0 : ulong const authority_len = (ulong)( authority_end-authority );
41 :
42 : /* Find port number */
43 0 : char const * const host = authority;
44 0 : ulong host_len = authority_len;
45 0 : char const * port = NULL;
46 0 : ulong port_len = 0UL;
47 0 : for( ulong j=0UL; j<authority_len; j++ ) {
48 0 : if( authority[ j ]==':' ) {
49 0 : host_len = j;
50 0 : port = authority +j+1;
51 0 : port_len = authority_len-j-1;
52 0 : break;
53 0 : }
54 0 : }
55 :
56 0 : if( FD_UNLIKELY( host_len>255 ) ) {
57 0 : *opt_err = FD_URL_ERR_HOST_OVERSZ;
58 0 : return NULL;
59 0 : }
60 :
61 :
62 0 : *url = (fd_url_t){
63 0 : .scheme = scheme,
64 0 : .scheme_len = scheme_len,
65 0 : .host = host,
66 0 : .host_len = host_len,
67 0 : .port = port,
68 0 : .port_len = port_len,
69 0 : .tail = authority+authority_len,
70 0 : .tail_len = (ulong)( url_end-(authority+authority_len) )
71 0 : };
72 :
73 0 : return url;
74 0 : }
75 :
76 : int
77 : fd_url_parse_endpoint( fd_url_t * url_,
78 : char const * url_str,
79 : ulong url_str_len,
80 : ushort * tcp_port,
81 : _Bool * is_ssl,
82 0 : char const * context ) {
83 0 : char const * ctx = context ? context : "URL";
84 :
85 0 : int url_err[1];
86 0 : fd_url_t * url = fd_url_parse_cstr( url_, url_str, url_str_len, url_err );
87 0 : if( FD_UNLIKELY( !url ) ) {
88 0 : switch( *url_err ) {
89 0 : case FD_URL_ERR_SCHEME:
90 0 : FD_LOG_WARNING(( "Invalid %s `%.*s`: must start with `http://` or `https://`", ctx, (int)url_str_len, url_str ));
91 0 : return -1;
92 0 : case FD_URL_ERR_HOST_OVERSZ:
93 0 : FD_LOG_WARNING(( "Invalid %s `%.*s`: domain name is too long", ctx, (int)url_str_len, url_str ));
94 0 : return -1;
95 0 : case FD_URL_ERR_USERINFO:
96 0 : FD_LOG_WARNING(( "Invalid %s `%.*s`: userinfo is not supported", ctx, (int)url_str_len, url_str ));
97 0 : return -1;
98 0 : default:
99 0 : FD_LOG_WARNING(( "Invalid %s `%.*s`", ctx, (int)url_str_len, url_str ));
100 0 : return -1;
101 0 : }
102 0 : }
103 :
104 : /* fd_url_parse_cstr() already guarantees http:// or https:// */
105 0 : *is_ssl = ( url->scheme_len==8UL );
106 :
107 0 : *tcp_port = *is_ssl ? 443 : 80;
108 0 : if( url->port_len ) {
109 0 : if( FD_UNLIKELY( url->port_len > 5 ) ) {
110 0 : invalid_port:
111 0 : FD_LOG_WARNING(( "Invalid %s `%.*s`: invalid port number", ctx, (int)url_str_len, url_str ));
112 0 : return -1;
113 0 : }
114 :
115 0 : char port_cstr[6];
116 0 : fd_cstr_fini( fd_cstr_append_text( fd_cstr_init( port_cstr ), url->port, url->port_len ) );
117 0 : ulong port_no = fd_cstr_to_ulong( port_cstr );
118 0 : if( FD_UNLIKELY( !port_no || port_no>USHORT_MAX ) ) goto invalid_port;
119 :
120 0 : *tcp_port = (ushort)port_no;
121 0 : }
122 :
123 0 : return 0;
124 0 : }
125 :
126 :
127 : static inline int
128 0 : fd_hex_unhex( int c ) {
129 0 : if( c>='0' && c<='9' ) return c-'0';
130 0 : if( c>='a' && c<='f' ) return c-'a'+0xa;
131 0 : if( c>='A' && c<='F' ) return c-'A'+0xa;
132 0 : return -1;
133 0 : }
134 :
135 : ulong
136 : fd_url_unescape( char * const msg,
137 0 : ulong const len ) {
138 0 : char * end = msg+len;
139 0 : char * dst = msg;
140 0 : for( char * src=msg; src<end; src++ ) {
141 : /* invariant: dst<=src */
142 0 : if( FD_LIKELY( (*src)!='%' ) ) {
143 0 : *(dst++) = *src;
144 0 : } else {
145 0 : if( FD_UNLIKELY( src+2>=end ) ) return 0UL; /* truncated percent encoding */
146 0 : int hi = fd_hex_unhex( src[1] );
147 0 : int lo = fd_hex_unhex( src[2] );
148 0 : if( FD_UNLIKELY( (hi|lo)<0 ) ) return 0UL; /* invalid hex digit */
149 0 : *(dst++) = (char)( (hi<<4) | lo );
150 0 : src += 2;
151 0 : }
152 0 : }
153 0 : return (ulong)( dst-msg );
154 0 : }
|