/src/crow/include/crow/query_string.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | #include <stdio.h> |
4 | | #include <string.h> |
5 | | #include <string> |
6 | | #include <vector> |
7 | | #include <unordered_map> |
8 | | #include <iostream> |
9 | | #include <memory> |
10 | | |
11 | | namespace crow |
12 | | { |
13 | | |
14 | | // ---------------------------------------------------------------------------- |
15 | | // qs_parse (modified) |
16 | | // https://github.com/bartgrantham/qs_parse |
17 | | // ---------------------------------------------------------------------------- |
18 | | /* Similar to strncmp, but handles URL-encoding for either string */ |
19 | | int qs_strncmp(const char* s, const char* qs, size_t n); |
20 | | |
21 | | |
22 | | /* Finds the beginning of each key/value pair and stores a pointer in qs_kv. |
23 | | * Also decodes the value portion of the k/v pair *in-place*. In a future |
24 | | * enhancement it will also have a compile-time option of sorting qs_kv |
25 | | * alphabetically by key. */ |
26 | | size_t qs_parse(char* qs, char* qs_kv[], size_t qs_kv_size, bool parse_url); |
27 | | |
28 | | |
29 | | /* Used by qs_parse to decode the value portion of a k/v pair */ |
30 | | int qs_decode(char * qs); |
31 | | |
32 | | |
33 | | /* Looks up the value according to the key on a pre-processed query string |
34 | | * A future enhancement will be a compile-time option to look up the key |
35 | | * in a pre-sorted qs_kv array via a binary search. */ |
36 | | //char * qs_k2v(const char * key, char * qs_kv[], int qs_kv_size); |
37 | | char * qs_k2v(const char * key, char * const * qs_kv, size_t qs_kv_size, int nth); |
38 | | |
39 | | |
40 | | /* Non-destructive lookup of value, based on key. User provides the |
41 | | * destinaton string and length. */ |
42 | | char * qs_scanvalue(const char * key, const char * qs, char * val, size_t val_len); |
43 | | |
44 | | // TODO: implement sorting of the qs_kv array; for now ensure it's not compiled |
45 | | #undef _qsSORTING |
46 | | |
47 | | // isxdigit _is_ available in <ctype.h>, but let's avoid another header instead |
48 | | #define CROW_QS_ISHEX(x) ((((x)>='0'&&(x)<='9') || ((x)>='A'&&(x)<='F') || ((x)>='a'&&(x)<='f')) ? 1 : 0) |
49 | | #define CROW_QS_HEX2DEC(x) (((x)>='0'&&(x)<='9') ? (x)-48 : ((x)>='A'&&(x)<='F') ? (x)-55 : ((x)>='a'&&(x)<='f') ? (x)-87 : 0) |
50 | | #define CROW_QS_ISQSCHR(x) ((((x)=='=')||((x)=='#')||((x)=='&')||((x)=='\0')) ? 0 : 1) |
51 | | |
52 | | inline int qs_strncmp(const char * s, const char * qs, size_t n) |
53 | 0 | { |
54 | 0 | unsigned char u1, u2, unyb, lnyb; |
55 | 0 |
|
56 | 0 | while(n-- > 0) |
57 | 0 | { |
58 | 0 | u1 = static_cast<unsigned char>(*s++); |
59 | 0 | u2 = static_cast<unsigned char>(*qs++); |
60 | 0 |
|
61 | 0 | if ( ! CROW_QS_ISQSCHR(u1) ) { u1 = '\0'; } |
62 | 0 | if ( ! CROW_QS_ISQSCHR(u2) ) { u2 = '\0'; } |
63 | 0 |
|
64 | 0 | if ( u1 == '+' ) { u1 = ' '; } |
65 | 0 | if ( u1 == '%' ) // easier/safer than scanf |
66 | 0 | { |
67 | 0 | unyb = static_cast<unsigned char>(*s++); |
68 | 0 | lnyb = static_cast<unsigned char>(*s++); |
69 | 0 | if ( CROW_QS_ISHEX(unyb) && CROW_QS_ISHEX(lnyb) ) |
70 | 0 | u1 = (CROW_QS_HEX2DEC(unyb) * 16) + CROW_QS_HEX2DEC(lnyb); |
71 | 0 | else |
72 | 0 | u1 = '\0'; |
73 | 0 | } |
74 | 0 |
|
75 | 0 | if ( u2 == '+' ) { u2 = ' '; } |
76 | 0 | if ( u2 == '%' ) // easier/safer than scanf |
77 | 0 | { |
78 | 0 | unyb = static_cast<unsigned char>(*qs++); |
79 | 0 | lnyb = static_cast<unsigned char>(*qs++); |
80 | 0 | if ( CROW_QS_ISHEX(unyb) && CROW_QS_ISHEX(lnyb) ) |
81 | 0 | u2 = (CROW_QS_HEX2DEC(unyb) * 16) + CROW_QS_HEX2DEC(lnyb); |
82 | 0 | else |
83 | 0 | u2 = '\0'; |
84 | 0 | } |
85 | 0 |
|
86 | 0 | if ( u1 != u2 ) |
87 | 0 | return u1 - u2; |
88 | 0 | if ( u1 == '\0' ) |
89 | 0 | return 0; |
90 | 0 | } |
91 | 0 | if ( CROW_QS_ISQSCHR(*qs) ) |
92 | 0 | return -1; |
93 | 0 | else |
94 | 0 | return 0; |
95 | 0 | } |
96 | | |
97 | | |
98 | | inline size_t qs_parse(char* qs, char* qs_kv[], size_t qs_kv_size, bool parse_url = true) |
99 | 0 | { |
100 | 0 | size_t i, j; |
101 | 0 | char * substr_ptr; |
102 | 0 |
|
103 | 0 | for(i=0; i<qs_kv_size; i++) qs_kv[i] = NULL; |
104 | 0 |
|
105 | 0 | // find the beginning of the k/v substrings or the fragment |
106 | 0 | substr_ptr = parse_url ? qs + strcspn(qs, "?#") : qs; |
107 | 0 | if (parse_url) |
108 | 0 | { |
109 | 0 | if (substr_ptr[0] != '\0') |
110 | 0 | substr_ptr++; |
111 | 0 | else |
112 | 0 | return 0; // no query or fragment |
113 | 0 | } |
114 | 0 |
|
115 | 0 | i=0; |
116 | 0 | while(i<qs_kv_size) |
117 | 0 | { |
118 | 0 | qs_kv[i] = substr_ptr; |
119 | 0 | j = strcspn(substr_ptr, "&"); |
120 | 0 | if ( substr_ptr[j] == '\0' ) { i++; break; } // x &'s -> means x iterations of this loop -> means *x+1* k/v pairs |
121 | 0 | substr_ptr += j + 1; |
122 | 0 | i++; |
123 | 0 | } |
124 | 0 |
|
125 | 0 | // we only decode the values in place, the keys could have '='s in them |
126 | 0 | // which will hose our ability to distinguish keys from values later |
127 | 0 | for(j=0; j<i; j++) |
128 | 0 | { |
129 | 0 | substr_ptr = qs_kv[j] + strcspn(qs_kv[j], "=&#"); |
130 | 0 | if ( substr_ptr[0] == '&' || substr_ptr[0] == '\0') // blank value: skip decoding |
131 | 0 | substr_ptr[0] = '\0'; |
132 | 0 | else |
133 | 0 | qs_decode(++substr_ptr); |
134 | 0 | } |
135 | 0 |
|
136 | 0 | #ifdef _qsSORTING |
137 | 0 | // TODO: qsort qs_kv, using qs_strncmp() for the comparison |
138 | 0 | #endif |
139 | 0 |
|
140 | 0 | return i; |
141 | 0 | } |
142 | | |
143 | | |
144 | | inline int qs_decode(char * qs) |
145 | 0 | { |
146 | 0 | int i=0, j=0; |
147 | 0 |
|
148 | 0 | while( CROW_QS_ISQSCHR(qs[j]) ) |
149 | 0 | { |
150 | 0 | if ( qs[j] == '+' ) { qs[i] = ' '; } |
151 | 0 | else if ( qs[j] == '%' ) // easier/safer than scanf |
152 | 0 | { |
153 | 0 | if ( ! CROW_QS_ISHEX(qs[j+1]) || ! CROW_QS_ISHEX(qs[j+2]) ) |
154 | 0 | { |
155 | 0 | qs[i] = '\0'; |
156 | 0 | return i; |
157 | 0 | } |
158 | 0 | qs[i] = (CROW_QS_HEX2DEC(qs[j+1]) * 16) + CROW_QS_HEX2DEC(qs[j+2]); |
159 | 0 | j+=2; |
160 | 0 | } |
161 | 0 | else |
162 | 0 | { |
163 | 0 | qs[i] = qs[j]; |
164 | 0 | } |
165 | 0 | i++; j++; |
166 | 0 | } |
167 | 0 | qs[i] = '\0'; |
168 | 0 |
|
169 | 0 | return i; |
170 | 0 | } |
171 | | |
172 | | |
173 | | inline char * qs_k2v(const char * key, char * const * qs_kv, size_t qs_kv_size, int nth = 0) |
174 | 0 | { |
175 | 0 | size_t i; |
176 | 0 | size_t key_len, skip; |
177 | 0 |
|
178 | 0 | key_len = strlen(key); |
179 | 0 |
|
180 | 0 | #ifdef _qsSORTING |
181 | 0 | // TODO: binary search for key in the sorted qs_kv |
182 | 0 | #else // _qsSORTING |
183 | 0 | for(i=0; i<qs_kv_size; i++) |
184 | 0 | { |
185 | 0 | // we rely on the unambiguous '=' to find the value in our k/v pair |
186 | 0 | if ( qs_strncmp(key, qs_kv[i], key_len) == 0 ) |
187 | 0 | { |
188 | 0 | skip = strcspn(qs_kv[i], "="); |
189 | 0 | if ( qs_kv[i][skip] == '=' ) |
190 | 0 | skip++; |
191 | 0 | // return (zero-char value) ? ptr to trailing '\0' : ptr to value |
192 | 0 | if(nth == 0) |
193 | 0 | return qs_kv[i] + skip; |
194 | 0 | else |
195 | 0 | --nth; |
196 | 0 | } |
197 | 0 | } |
198 | 0 | #endif // _qsSORTING |
199 | 0 |
|
200 | 0 | return nullptr; |
201 | 0 | } |
202 | | |
203 | | inline std::unique_ptr<std::pair<std::string, std::string>> qs_dict_name2kv(const char * dict_name, char * const * qs_kv, size_t qs_kv_size, int nth = 0) |
204 | 0 | { |
205 | 0 | size_t i; |
206 | 0 | size_t name_len, skip_to_eq, skip_to_brace_open, skip_to_brace_close; |
207 | 0 |
|
208 | 0 | name_len = strlen(dict_name); |
209 | 0 |
|
210 | 0 | #ifdef _qsSORTING |
211 | 0 | // TODO: binary search for key in the sorted qs_kv |
212 | 0 | #else // _qsSORTING |
213 | 0 | for(i=0; i<qs_kv_size; i++) |
214 | 0 | { |
215 | 0 | if ( strncmp(dict_name, qs_kv[i], name_len) == 0 ) |
216 | 0 | { |
217 | 0 | skip_to_eq = strcspn(qs_kv[i], "="); |
218 | 0 | if ( qs_kv[i][skip_to_eq] == '=' ) |
219 | 0 | skip_to_eq++; |
220 | 0 | skip_to_brace_open = strcspn(qs_kv[i], "["); |
221 | 0 | if ( qs_kv[i][skip_to_brace_open] == '[' ) |
222 | 0 | skip_to_brace_open++; |
223 | 0 | skip_to_brace_close = strcspn(qs_kv[i], "]"); |
224 | 0 |
|
225 | 0 | if ( skip_to_brace_open <= skip_to_brace_close && |
226 | 0 | skip_to_brace_open > 0 && |
227 | 0 | skip_to_brace_close > 0 && |
228 | 0 | nth == 0 ) |
229 | 0 | { |
230 | 0 | auto key = std::string(qs_kv[i] + skip_to_brace_open, skip_to_brace_close - skip_to_brace_open); |
231 | 0 | auto value = std::string(qs_kv[i] + skip_to_eq); |
232 | 0 | return std::unique_ptr<std::pair<std::string, std::string>>(new std::pair<std::string, std::string>(key, value)); |
233 | 0 | } |
234 | 0 | else |
235 | 0 | { |
236 | 0 | --nth; |
237 | 0 | } |
238 | 0 | } |
239 | 0 | } |
240 | 0 | #endif // _qsSORTING |
241 | 0 |
|
242 | 0 | return nullptr; |
243 | 0 | } |
244 | | |
245 | | |
246 | | inline char * qs_scanvalue(const char * key, const char * qs, char * val, size_t val_len) |
247 | 0 | { |
248 | 0 | size_t i, key_len; |
249 | 0 | const char * tmp; |
250 | 0 |
|
251 | 0 | // find the beginning of the k/v substrings |
252 | 0 | if ( (tmp = strchr(qs, '?')) != NULL ) |
253 | 0 | qs = tmp + 1; |
254 | 0 |
|
255 | 0 | key_len = strlen(key); |
256 | 0 | while(qs[0] != '#' && qs[0] != '\0') |
257 | 0 | { |
258 | 0 | if ( qs_strncmp(key, qs, key_len) == 0 ) |
259 | 0 | break; |
260 | 0 | qs += strcspn(qs, "&") + 1; |
261 | 0 | } |
262 | 0 |
|
263 | 0 | if ( qs[0] == '\0' ) return NULL; |
264 | 0 |
|
265 | 0 | qs += strcspn(qs, "=&#"); |
266 | 0 | if ( qs[0] == '=' ) |
267 | 0 | { |
268 | 0 | qs++; |
269 | 0 | i = strcspn(qs, "&=#"); |
270 | 0 | #ifdef _MSC_VER |
271 | 0 | strncpy_s(val, val_len, qs, (val_len - 1)<(i + 1) ? (val_len - 1) : (i + 1)); |
272 | 0 | #else |
273 | 0 | strncpy(val, qs, (val_len - 1)<(i + 1) ? (val_len - 1) : (i + 1)); |
274 | 0 | #endif |
275 | 0 | qs_decode(val); |
276 | 0 | } |
277 | 0 | else |
278 | 0 | { |
279 | 0 | if ( val_len > 0 ) |
280 | 0 | val[0] = '\0'; |
281 | 0 | } |
282 | 0 |
|
283 | 0 | return val; |
284 | 0 | } |
285 | | } |
286 | | // ---------------------------------------------------------------------------- |
287 | | |
288 | | |
289 | | namespace crow |
290 | | { |
291 | | struct request; |
292 | | /// A class to represent any data coming after the `?` in the request URL into key-value pairs. |
293 | | class query_string |
294 | | { |
295 | | public: |
296 | | static const int MAX_KEY_VALUE_PAIRS_COUNT = 256; |
297 | | |
298 | | query_string() = default; |
299 | | |
300 | | query_string(const query_string& qs): |
301 | | url_(qs.url_) |
302 | 0 | { |
303 | 0 | for (auto p : qs.key_value_pairs_) |
304 | 0 | { |
305 | 0 | key_value_pairs_.push_back((char*)(p - qs.url_.c_str() + url_.c_str())); |
306 | 0 | } |
307 | 0 | } |
308 | | |
309 | | query_string& operator=(const query_string& qs) |
310 | 0 | { |
311 | 0 | url_ = qs.url_; |
312 | 0 | key_value_pairs_.clear(); |
313 | 0 | for (auto p : qs.key_value_pairs_) |
314 | 0 | { |
315 | 0 | key_value_pairs_.push_back((char*)(p - qs.url_.c_str() + url_.c_str())); |
316 | 0 | } |
317 | 0 | return *this; |
318 | 0 | } |
319 | | |
320 | | query_string& operator=(query_string&& qs) noexcept |
321 | 0 | { |
322 | 0 | key_value_pairs_ = std::move(qs.key_value_pairs_); |
323 | 0 | char* old_data = (char*)qs.url_.c_str(); |
324 | 0 | url_ = std::move(qs.url_); |
325 | 0 | for (auto& p : key_value_pairs_) |
326 | 0 | { |
327 | 0 | p += (char*)url_.c_str() - old_data; |
328 | 0 | } |
329 | 0 | return *this; |
330 | 0 | } |
331 | | |
332 | | |
333 | | query_string(std::string params, bool url = true): |
334 | | url_(std::move(params)) |
335 | 0 | { |
336 | 0 | if (url_.empty()) |
337 | 0 | return; |
338 | 0 |
|
339 | 0 | key_value_pairs_.resize(MAX_KEY_VALUE_PAIRS_COUNT); |
340 | 0 | size_t count = qs_parse(&url_[0], &key_value_pairs_[0], MAX_KEY_VALUE_PAIRS_COUNT, url); |
341 | 0 |
|
342 | 0 | key_value_pairs_.resize(count); |
343 | 0 | key_value_pairs_.shrink_to_fit(); |
344 | 0 | } |
345 | | |
346 | | void clear() |
347 | 0 | { |
348 | 0 | key_value_pairs_.clear(); |
349 | 0 | url_.clear(); |
350 | 0 | } |
351 | | |
352 | | friend std::ostream& operator<<(std::ostream& os, const query_string& qs) |
353 | 0 | { |
354 | 0 | os << "[ "; |
355 | 0 | for (size_t i = 0; i < qs.key_value_pairs_.size(); ++i) |
356 | 0 | { |
357 | 0 | if (i) |
358 | 0 | os << ", "; |
359 | 0 | os << qs.key_value_pairs_[i]; |
360 | 0 | } |
361 | 0 | os << " ]"; |
362 | 0 | return os; |
363 | 0 | } |
364 | | |
365 | | /// Get a value from a name, used for `?name=value`. |
366 | | |
367 | | /// |
368 | | /// Note: this method returns the value of the first occurrence of the key only, to return all occurrences, see \ref get_list(). |
369 | | char* get(const std::string& name) const |
370 | 0 | { |
371 | 0 | char* ret = qs_k2v(name.c_str(), key_value_pairs_.data(), key_value_pairs_.size()); |
372 | 0 | return ret; |
373 | 0 | } |
374 | | |
375 | | /// Works similar to \ref get() except it removes the item from the query string. |
376 | | char* pop(const std::string& name) |
377 | 0 | { |
378 | 0 | char* ret = get(name); |
379 | 0 | if (ret != nullptr) |
380 | 0 | { |
381 | 0 | const std::string key_name = name + '='; |
382 | 0 | for (unsigned int i = 0; i < key_value_pairs_.size(); i++) |
383 | 0 | { |
384 | 0 | std::string str_item(key_value_pairs_[i]); |
385 | 0 | if (str_item.find(key_name)==0) |
386 | 0 | { |
387 | 0 | key_value_pairs_.erase(key_value_pairs_.begin() + i); |
388 | 0 | break; |
389 | 0 | } |
390 | 0 | } |
391 | 0 | } |
392 | 0 | return ret; |
393 | 0 | } |
394 | | |
395 | | /// Returns a list of values, passed as `?name[]=value1&name[]=value2&...name[]=valuen` with n being the size of the list. |
396 | | |
397 | | /// |
398 | | /// Note: Square brackets in the above example are controlled by `use_brackets` boolean (true by default). If set to false, the example becomes `?name=value1,name=value2...name=valuen` |
399 | | std::vector<char*> get_list(const std::string& name, bool use_brackets = true) const |
400 | 0 | { |
401 | 0 | std::vector<char*> ret; |
402 | 0 | std::string plus = name + (use_brackets ? "[]" : ""); |
403 | 0 | char* element = nullptr; |
404 | 0 |
|
405 | 0 | int count = 0; |
406 | 0 | while (1) |
407 | 0 | { |
408 | 0 | element = qs_k2v(plus.c_str(), key_value_pairs_.data(), key_value_pairs_.size(), count++); |
409 | 0 | if (!element) |
410 | 0 | break; |
411 | 0 | ret.push_back(element); |
412 | 0 | } |
413 | 0 | return ret; |
414 | 0 | } |
415 | | |
416 | | /// Similar to \ref get_list() but it removes the |
417 | | std::vector<char*> pop_list(const std::string& name, bool use_brackets = true) |
418 | 0 | { |
419 | 0 | std::vector<char*> ret = get_list(name, use_brackets); |
420 | 0 | const size_t name_len = name.length(); |
421 | 0 | if (!ret.empty()) |
422 | 0 | { |
423 | 0 | for (unsigned int i = 0; i < key_value_pairs_.size(); i++) |
424 | 0 | { |
425 | 0 | std::string str_item(key_value_pairs_[i]); |
426 | 0 | if (str_item.find(name)==0) { |
427 | 0 | if (use_brackets && str_item.find("[]=",name_len)==name_len) { |
428 | 0 | key_value_pairs_.erase(key_value_pairs_.begin() + i--); |
429 | 0 | } else if (!use_brackets && str_item.find('=',name_len)==name_len ) { |
430 | 0 | key_value_pairs_.erase(key_value_pairs_.begin() + i--); |
431 | 0 | } |
432 | 0 | } |
433 | 0 | } |
434 | 0 | } |
435 | 0 | return ret; |
436 | 0 | } |
437 | | |
438 | | /// Works similar to \ref get_list() except the brackets are mandatory must not be empty. |
439 | | |
440 | | /// |
441 | | /// For example calling `get_dict(yourname)` on `?yourname[sub1]=42&yourname[sub2]=84` would give a map containing `{sub1 : 42, sub2 : 84}`. |
442 | | /// |
443 | | /// if your query string has both empty brackets and ones with a key inside, use pop_list() to get all the values without a key before running this method. |
444 | | std::unordered_map<std::string, std::string> get_dict(const std::string& name) const |
445 | 0 | { |
446 | 0 | std::unordered_map<std::string, std::string> ret; |
447 | 0 |
|
448 | 0 | int count = 0; |
449 | 0 | while (1) |
450 | 0 | { |
451 | 0 | if (auto element = qs_dict_name2kv(name.c_str(), key_value_pairs_.data(), key_value_pairs_.size(), count++)) |
452 | 0 | ret.insert(*element); |
453 | 0 | else |
454 | 0 | break; |
455 | 0 | } |
456 | 0 | return ret; |
457 | 0 | } |
458 | | |
459 | | /// Works the same as \ref get_dict() but removes the values from the query string. |
460 | | std::unordered_map<std::string, std::string> pop_dict(const std::string& name) |
461 | 0 | { |
462 | 0 | const std::string name_value = name +'['; |
463 | 0 | std::unordered_map<std::string, std::string> ret = get_dict(name); |
464 | 0 | if (!ret.empty()) |
465 | 0 | { |
466 | 0 | for (unsigned int i = 0; i < key_value_pairs_.size(); i++) |
467 | 0 | { |
468 | 0 | std::string str_item(key_value_pairs_[i]); |
469 | 0 | if (str_item.find(name_value)==0) |
470 | 0 | { |
471 | 0 | key_value_pairs_.erase(key_value_pairs_.begin() + i--); |
472 | 0 | } |
473 | 0 | } |
474 | 0 | } |
475 | 0 | return ret; |
476 | 0 | } |
477 | | |
478 | | std::vector<std::string> keys() const |
479 | 0 | { |
480 | 0 | std::vector<std::string> keys; |
481 | 0 | keys.reserve(key_value_pairs_.size()); |
482 | 0 |
|
483 | 0 | for (const char* const element : key_value_pairs_) |
484 | 0 | { |
485 | 0 | const char* delimiter = strchr(element, '='); |
486 | 0 | if (delimiter) |
487 | 0 | keys.emplace_back(element, delimiter); |
488 | 0 | else |
489 | 0 | keys.emplace_back(element); |
490 | 0 | } |
491 | 0 |
|
492 | 0 | return keys; |
493 | 0 | } |
494 | | |
495 | | private: |
496 | | std::string url_; |
497 | | std::vector<char*> key_value_pairs_; |
498 | | }; |
499 | | |
500 | | } // namespace crow |