/src/simdjson/include/simdjson/padded_string.h
Line | Count | Source |
1 | | #ifndef SIMDJSON_PADDED_STRING_H |
2 | | #define SIMDJSON_PADDED_STRING_H |
3 | | |
4 | | #include "simdjson/base.h" |
5 | | #include "simdjson/error.h" |
6 | | |
7 | | #include "simdjson/error-inl.h" |
8 | | |
9 | | #include <cstring> |
10 | | #include <memory> |
11 | | #include <string> |
12 | | #include <ostream> |
13 | | |
14 | | namespace simdjson { |
15 | | |
16 | | class padded_string_view; |
17 | | |
18 | | /** |
19 | | * String with extra allocation for ease of use with parser::parse() |
20 | | * |
21 | | * This is a move-only class, it cannot be copied. |
22 | | */ |
23 | | struct padded_string final { |
24 | | |
25 | | /** |
26 | | * Create a new, empty padded string. |
27 | | */ |
28 | | explicit inline padded_string() noexcept; |
29 | | /** |
30 | | * Create a new padded string buffer. |
31 | | * |
32 | | * @param length the size of the string. |
33 | | */ |
34 | | explicit inline padded_string(size_t length) noexcept; |
35 | | /** |
36 | | * Create a new padded string by copying the given input. |
37 | | * |
38 | | * @param data the buffer to copy |
39 | | * @param length the number of bytes to copy |
40 | | */ |
41 | | explicit inline padded_string(const char *data, size_t length) noexcept; |
42 | | #ifdef __cpp_char8_t |
43 | | explicit inline padded_string(const char8_t *data, size_t length) noexcept; |
44 | | #endif |
45 | | /** |
46 | | * Create a new padded string by copying the given input. |
47 | | * |
48 | | * @param str_ the string to copy |
49 | | */ |
50 | | inline padded_string(const std::string & str_ ) noexcept; |
51 | | /** |
52 | | * Create a new padded string by copying the given input. |
53 | | * |
54 | | * @param sv_ the string to copy |
55 | | */ |
56 | | inline padded_string(std::string_view sv_) noexcept; |
57 | | /** |
58 | | * Move one padded string into another. |
59 | | * |
60 | | * The original padded string will be reduced to zero capacity. |
61 | | * |
62 | | * @param o the string to move. |
63 | | */ |
64 | | inline padded_string(padded_string &&o) noexcept; |
65 | | /** |
66 | | * Move one padded string into another. |
67 | | * |
68 | | * The original padded string will be reduced to zero capacity. |
69 | | * |
70 | | * @param o the string to move. |
71 | | */ |
72 | | inline padded_string &operator=(padded_string &&o) noexcept; |
73 | | inline void swap(padded_string &o) noexcept; |
74 | | ~padded_string() noexcept; |
75 | | |
76 | | /** |
77 | | * The length of the string. |
78 | | * |
79 | | * Does not include padding. |
80 | | */ |
81 | | size_t size() const noexcept; |
82 | | |
83 | | /** |
84 | | * The length of the string. |
85 | | * |
86 | | * Does not include padding. |
87 | | */ |
88 | | size_t length() const noexcept; |
89 | | |
90 | | /** |
91 | | * The string data. |
92 | | **/ |
93 | | const char *data() const noexcept; |
94 | 0 | const uint8_t *u8data() const noexcept { return static_cast<const uint8_t*>(static_cast<const void*>(data_ptr));} |
95 | | |
96 | | /** |
97 | | * The string data. |
98 | | **/ |
99 | | char *data() noexcept; |
100 | | |
101 | | /** |
102 | | * Create a std::string_view with the same content. |
103 | | */ |
104 | | operator std::string_view() const; |
105 | | |
106 | | /** |
107 | | * Create a padded_string_view with the same content. |
108 | | */ |
109 | | operator padded_string_view() const noexcept; |
110 | | |
111 | | /** |
112 | | * Load this padded string from a file. |
113 | | * |
114 | | * ## Windows and Unicode |
115 | | * |
116 | | * Windows users who need to read files with non-ANSI characters in the |
117 | | * name should set their code page to UTF-8 (65001) before calling this |
118 | | * function. This should be the default with Windows 11 and better. |
119 | | * Further, they may use the AreFileApisANSI function to determine whether |
120 | | * the filename is interpreted using the ANSI or the system default OEM |
121 | | * codepage, and they may call SetFileApisToOEM accordingly. |
122 | | * |
123 | | * @return IO_ERROR on error. Be mindful that on some 32-bit systems, |
124 | | * the file size might be limited to 2 GB. |
125 | | * |
126 | | * @param path the path to the file. |
127 | | **/ |
128 | | inline static simdjson_result<padded_string> load(std::string_view path) noexcept; |
129 | | |
130 | | #if defined(_WIN32) && SIMDJSON_CPLUSPLUS17 |
131 | | /** |
132 | | * This function accepts a wide string path (UTF-16) and converts it to |
133 | | * UTF-8 before loading the file. This allows windows users to work |
134 | | * with unicode file paths without manually converting the paths every time. |
135 | | * |
136 | | * @return IO_ERROR on error, including conversion failures. |
137 | | * |
138 | | * @param path the path to the file as a wide string. |
139 | | **/ |
140 | | inline static simdjson_result<padded_string> load(std::wstring_view path) noexcept; |
141 | | #endif |
142 | | |
143 | | private: |
144 | | padded_string &operator=(const padded_string &o) = delete; |
145 | | padded_string(const padded_string &o) = delete; |
146 | | |
147 | | size_t viable_size{0}; |
148 | | char *data_ptr{nullptr}; |
149 | | |
150 | | }; // padded_string |
151 | | |
152 | | /** |
153 | | * Send padded_string instance to an output stream. |
154 | | * |
155 | | * @param out The output stream. |
156 | | * @param s The padded_string instance. |
157 | | * @throw if there is an error with the underlying output stream. simdjson itself will not throw. |
158 | | */ |
159 | 0 | inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } |
160 | | |
161 | | #if SIMDJSON_EXCEPTIONS |
162 | | /** |
163 | | * Send padded_string instance to an output stream. |
164 | | * |
165 | | * @param out The output stream. |
166 | | * @param s The padded_string instance. |
167 | | * @throw simdjson_error if the result being printed has an error. If there is an error with the |
168 | | * underlying output stream, that error will be propagated (simdjson_error will not be |
169 | | * thrown). |
170 | | */ |
171 | 0 | inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string> &s) noexcept(false) { return out << s.value(); } |
172 | | #endif |
173 | | |
174 | | } // namespace simdjson |
175 | | |
176 | | // This is deliberately outside of simdjson so that people get it without having to use the namespace |
177 | | inline simdjson::padded_string operator ""_padded(const char *str, size_t len); |
178 | | #ifdef __cpp_char8_t |
179 | | inline simdjson::padded_string operator ""_padded(const char8_t *str, size_t len); |
180 | | #endif |
181 | | |
182 | | namespace simdjson { |
183 | | namespace internal { |
184 | | |
185 | | // The allocate_padded_buffer function is a low-level function to allocate memory |
186 | | // with padding so we can read past the "length" bytes safely. It is used by |
187 | | // the padded_string class automatically. It returns nullptr in case |
188 | | // of error: the caller should check for a null pointer. |
189 | | // The length parameter is the maximum size in bytes of the string. |
190 | | // The caller is responsible to free the memory (e.g., delete[] (...)). |
191 | | inline char *allocate_padded_buffer(size_t length) noexcept; |
192 | | |
193 | | } // namespace internal |
194 | | } // namespace simdjson |
195 | | |
196 | | #endif // SIMDJSON_PADDED_STRING_H |