/src/capnproto/c++/src/capnp/serialize.h
Line | Count | Source |
1 | | // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors |
2 | | // Licensed under the MIT License: |
3 | | // |
4 | | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | | // of this software and associated documentation files (the "Software"), to deal |
6 | | // in the Software without restriction, including without limitation the rights |
7 | | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | | // copies of the Software, and to permit persons to whom the Software is |
9 | | // furnished to do so, subject to the following conditions: |
10 | | // |
11 | | // The above copyright notice and this permission notice shall be included in |
12 | | // all copies or substantial portions of the Software. |
13 | | // |
14 | | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
20 | | // THE SOFTWARE. |
21 | | |
22 | | // This file implements a simple serialization format for Cap'n Proto messages. The format |
23 | | // is as follows: |
24 | | // |
25 | | // * 32-bit little-endian segment count (4 bytes). |
26 | | // * 32-bit little-endian size of each segment (4*(segment count) bytes). |
27 | | // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even |
28 | | // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) |
29 | | // * Data from each segment, in order (8*sum(segment sizes) bytes) |
30 | | // |
31 | | // This format has some important properties: |
32 | | // - It is self-delimiting, so multiple messages may be written to a stream without any external |
33 | | // delimiter. |
34 | | // - The total size and position of each segment can be determined by reading only the first part |
35 | | // of the message, allowing lazy and random-access reading of the segment data. |
36 | | // - A message is always at least 8 bytes. |
37 | | // - A single-segment message can be read entirely in two system calls with no buffering. |
38 | | // - A multi-segment message can be read entirely in three system calls with no buffering. |
39 | | // - The format is appropriate for mmap()ing since all data is aligned. |
40 | | |
41 | | #pragma once |
42 | | |
43 | | #include "message.h" |
44 | | #include <kj/io.h> |
45 | | |
46 | | CAPNP_BEGIN_HEADER |
47 | | |
48 | | namespace capnp { |
49 | | |
50 | | class FlatArrayMessageReader: public MessageReader { |
51 | | // Parses a message from a flat array. Note that it makes sense to use this together with mmap() |
52 | | // for extremely fast parsing. |
53 | | |
54 | | public: |
55 | | FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); |
56 | | // The array must remain valid until the MessageReader is destroyed. |
57 | | // |
58 | | // `array` MUST be aligned, or this is likely to throw. |
59 | | |
60 | | FlatArrayMessageReader(kj::ArrayPtr<const byte> bytes, ReaderOptions options = ReaderOptions()); |
61 | | // Reads from a byte array instead of a word array. If the bytes are not aligned, a copy will |
62 | | // be made. |
63 | | |
64 | | kj::ArrayPtr<const word> getSegment(uint id) override; |
65 | | |
66 | 0 | const word* getEnd() const { return end; } |
67 | | // Get a pointer just past the end of the message as determined by reading the message header. |
68 | | // This could actually be before the end of the input array. This pointer is useful e.g. if |
69 | | // you know that the input array has extra stuff appended after the message and you want to |
70 | | // get at it. |
71 | | |
72 | | private: |
73 | | // Optimize for single-segment case. |
74 | | kj::ArrayPtr<const word> segment0; |
75 | | kj::Array<kj::ArrayPtr<const word>> moreSegments; |
76 | | const word* end; |
77 | | |
78 | | kj::Array<word> alignedCopy; |
79 | | |
80 | | void init(kj::ArrayPtr<const word> array); |
81 | | }; |
82 | | |
83 | | kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy( |
84 | | kj::ArrayPtr<const word> array, MessageBuilder& target, |
85 | | ReaderOptions options = ReaderOptions()); |
86 | | // Convenience function which reads a message using `FlatArrayMessageReader` then copies the |
87 | | // content into the target `MessageBuilder`, verifying that the message structure is valid |
88 | | // (although not necessarily that it matches the desired schema). |
89 | | // |
90 | | // Returns an ArrayPtr containing any words left over in the array after consuming the whole |
91 | | // message. This is useful when reading multiple messages that have been concatenated. See also |
92 | | // FlatArrayMessageReader::getEnd(). |
93 | | // |
94 | | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
95 | | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
96 | | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
97 | | |
98 | | kj::Array<word> messageToFlatArray(MessageBuilder& builder); |
99 | | // Constructs a flat array containing the entire content of the given message. |
100 | | // |
101 | | // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that |
102 | | // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being |
103 | | // deleted. For example: |
104 | | // |
105 | | // kj::Array<capnp::word> words = messageToFlatArray(myMessage); |
106 | | // kj::ArrayPtr<kj::byte> bytes = words.asBytes(); |
107 | | // write(fd, bytes.begin(), bytes.size()); |
108 | | |
109 | | kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
110 | | // Version of messageToFlatArray that takes a raw segment array. |
111 | | |
112 | | size_t computeSerializedSizeInWords(MessageBuilder& builder); |
113 | | // Returns the size, in words, that will be needed to serialize the message, including the header. |
114 | | |
115 | | size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
116 | | // Version of computeSerializedSizeInWords that takes a raw segment array. |
117 | | |
118 | | size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix); |
119 | | // Given a prefix of a serialized message, try to determine the expected total size of the message, |
120 | | // in words. The returned size is based on the information known so far; it may be an underestimate |
121 | | // if the prefix doesn't contain the full segment table. |
122 | | // |
123 | | // If the returned value is greater than `messagePrefix.size()`, then the message is not yet |
124 | | // complete and the app cannot parse it yet. If the returned value is less than or equal to |
125 | | // `messagePrefix.size()`, then the returned value is the exact total size of the message; any |
126 | | // remaining bytes are part of the next message. |
127 | | // |
128 | | // This function is useful when reading messages from a stream in an asynchronous way, but when |
129 | | // using the full KJ async infrastructure would be too difficult. Each time bytes are received, |
130 | | // use this function to determine if an entire message is ready to be parsed. |
131 | | |
132 | | kj::Array<word> serializeSegmentTable(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
133 | | // Returns the segments table for given message segments. |
134 | | // Fully serialized message consists of the table and segments written consecutively. |
135 | | |
136 | | // ======================================================================================= |
137 | | |
138 | | class InputStreamMessageReader: public MessageReader { |
139 | | // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader |
140 | | // for a subclass specific to file descriptors. |
141 | | |
142 | | public: |
143 | | InputStreamMessageReader(kj::InputStream& inputStream, |
144 | | ReaderOptions options = ReaderOptions(), |
145 | | kj::ArrayPtr<word> scratchSpace = nullptr); |
146 | | ~InputStreamMessageReader() noexcept(false); |
147 | | |
148 | | // implements MessageReader ---------------------------------------- |
149 | | kj::ArrayPtr<const word> getSegment(uint id) override; |
150 | | |
151 | | private: |
152 | | kj::InputStream& inputStream; |
153 | | byte* readPos; |
154 | | |
155 | | // Optimize for single-segment case. |
156 | | kj::ArrayPtr<const word> segment0; |
157 | | kj::Array<kj::ArrayPtr<const word>> moreSegments; |
158 | | |
159 | | kj::Array<word> ownedSpace; |
160 | | // Only if scratchSpace wasn't big enough. |
161 | | |
162 | | kj::UnwindDetector unwindDetector; |
163 | | }; |
164 | | |
165 | | void readMessageCopy(kj::InputStream& input, MessageBuilder& target, |
166 | | ReaderOptions options = ReaderOptions(), |
167 | | kj::ArrayPtr<word> scratchSpace = nullptr); |
168 | | // Convenience function which reads a message using `InputStreamMessageReader` then copies the |
169 | | // content into the target `MessageBuilder`, verifying that the message structure is valid |
170 | | // (although not necessarily that it matches the desired schema). |
171 | | // |
172 | | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
173 | | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
174 | | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
175 | | |
176 | | void writeMessage(kj::OutputStream& output, MessageBuilder& builder); |
177 | | // Write the message to the given output stream. |
178 | | |
179 | | void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
180 | | // Write the segment array to the given output stream. |
181 | | |
182 | | // ======================================================================================= |
183 | | // Specializations for reading from / writing to file descriptors. |
184 | | |
185 | | class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { |
186 | | // A MessageReader that reads from a stream-based file descriptor. |
187 | | |
188 | | public: |
189 | | StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), |
190 | | kj::ArrayPtr<word> scratchSpace = nullptr) |
191 | 0 | : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} |
192 | | // Read message from a file descriptor, without taking ownership of the descriptor. |
193 | | |
194 | | StreamFdMessageReader(kj::OwnFd fd, ReaderOptions options = ReaderOptions(), |
195 | | kj::ArrayPtr<word> scratchSpace = nullptr) |
196 | 0 | : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} |
197 | | // Read a message from a file descriptor, taking ownership of the descriptor. |
198 | | |
199 | | ~StreamFdMessageReader() noexcept(false); |
200 | | }; |
201 | | |
202 | | void readMessageCopyFromFd(int fd, MessageBuilder& target, |
203 | | ReaderOptions options = ReaderOptions(), |
204 | | kj::ArrayPtr<word> scratchSpace = nullptr); |
205 | | // Convenience function which reads a message using `StreamFdMessageReader` then copies the |
206 | | // content into the target `MessageBuilder`, verifying that the message structure is valid |
207 | | // (although not necessarily that it matches the desired schema). |
208 | | // |
209 | | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
210 | | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
211 | | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
212 | | |
213 | | void writeMessageToFd(int fd, MessageBuilder& builder); |
214 | | // Write the message to the given file descriptor. |
215 | | // |
216 | | // This function throws an exception on any I/O error. If your code is not exception-safe, be sure |
217 | | // you catch this exception at the call site. If throwing an exception is not acceptable, you |
218 | | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). |
219 | | |
220 | | void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
221 | | // Write the segment array to the given file descriptor. |
222 | | // |
223 | | // This function throws an exception on any I/O error. If your code is not exception-safe, be sure |
224 | | // you catch this exception at the call site. If throwing an exception is not acceptable, you |
225 | | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). |
226 | | |
227 | | // ======================================================================================= |
228 | | // inline stuff |
229 | | |
230 | 0 | inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) { |
231 | 0 | return messageToFlatArray(builder.getSegmentsForOutput()); |
232 | 0 | } |
233 | | |
234 | 0 | inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { |
235 | 0 | return computeSerializedSizeInWords(builder.getSegmentsForOutput()); |
236 | 0 | } |
237 | | |
238 | 0 | inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { |
239 | 0 | writeMessage(output, builder.getSegmentsForOutput()); |
240 | 0 | } |
241 | | |
242 | 0 | inline void writeMessageToFd(int fd, MessageBuilder& builder) { |
243 | 0 | writeMessageToFd(fd, builder.getSegmentsForOutput()); |
244 | 0 | } |
245 | | |
246 | | } // namespace capnp |
247 | | |
248 | | CAPNP_END_HEADER |