Coverage Report

Created: 2026-05-13 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/capnproto/c++/src/capnp/serialize.h
Line
Count
Source
1
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
2
// Licensed under the MIT License:
3
//
4
// Permission is hereby granted, free of charge, to any person obtaining a copy
5
// of this software and associated documentation files (the "Software"), to deal
6
// in the Software without restriction, including without limitation the rights
7
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
// copies of the Software, and to permit persons to whom the Software is
9
// furnished to do so, subject to the following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21
22
// This file implements a simple serialization format for Cap'n Proto messages.  The format
23
// is as follows:
24
//
25
// * 32-bit little-endian segment count (4 bytes).
26
// * 32-bit little-endian size of each segment (4*(segment count) bytes).
27
// * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes).  (I.e., if there are an even
28
//     number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
29
// * Data from each segment, in order (8*sum(segment sizes) bytes)
30
//
31
// This format has some important properties:
32
// - It is self-delimiting, so multiple messages may be written to a stream without any external
33
//   delimiter.
34
// - The total size and position of each segment can be determined by reading only the first part
35
//   of the message, allowing lazy and random-access reading of the segment data.
36
// - A message is always at least 8 bytes.
37
// - A single-segment message can be read entirely in two system calls with no buffering.
38
// - A multi-segment message can be read entirely in three system calls with no buffering.
39
// - The format is appropriate for mmap()ing since all data is aligned.
40
41
#pragma once
42
43
#include "message.h"
44
#include <kj/io.h>
45
46
CAPNP_BEGIN_HEADER
47
48
namespace capnp {
49
50
class FlatArrayMessageReader: public MessageReader {
51
  // Parses a message from a flat array.  Note that it makes sense to use this together with mmap()
52
  // for extremely fast parsing.
53
54
public:
55
  FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
56
  // The array must remain valid until the MessageReader is destroyed.
57
  //
58
  // `array` MUST be aligned, or this is likely to throw.
59
60
  FlatArrayMessageReader(kj::ArrayPtr<const byte> bytes, ReaderOptions options = ReaderOptions());
61
  // Reads from a byte array instead of a word array. If the bytes are not aligned, a copy will
62
  // be made.
63
64
  kj::ArrayPtr<const word> getSegment(uint id) override;
65
66
0
  const word* getEnd() const { return end; }
67
  // Get a pointer just past the end of the message as determined by reading the message header.
68
  // This could actually be before the end of the input array.  This pointer is useful e.g. if
69
  // you know that the input array has extra stuff appended after the message and you want to
70
  // get at it.
71
72
private:
73
  // Optimize for single-segment case.
74
  kj::ArrayPtr<const word> segment0;
75
  kj::Array<kj::ArrayPtr<const word>> moreSegments;
76
  const word* end;
77
78
  kj::Array<word> alignedCopy;
79
80
  void init(kj::ArrayPtr<const word> array);
81
};
82
83
kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
84
    kj::ArrayPtr<const word> array, MessageBuilder& target,
85
    ReaderOptions options = ReaderOptions());
86
// Convenience function which reads a message using `FlatArrayMessageReader` then copies the
87
// content into the target `MessageBuilder`, verifying that the message structure is valid
88
// (although not necessarily that it matches the desired schema).
89
//
90
// Returns an ArrayPtr containing any words left over in the array after consuming the whole
91
// message. This is useful when reading multiple messages that have been concatenated. See also
92
// FlatArrayMessageReader::getEnd().
93
//
94
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
95
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
96
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
97
98
kj::Array<word> messageToFlatArray(MessageBuilder& builder);
99
// Constructs a flat array containing the entire content of the given message.
100
//
101
// To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
102
// `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
103
// deleted. For example:
104
//
105
//     kj::Array<capnp::word> words = messageToFlatArray(myMessage);
106
//     kj::ArrayPtr<kj::byte> bytes = words.asBytes();
107
//     write(fd, bytes.begin(), bytes.size());
108
109
kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
110
// Version of messageToFlatArray that takes a raw segment array.
111
112
size_t computeSerializedSizeInWords(MessageBuilder& builder);
113
// Returns the size, in words, that will be needed to serialize the message, including the header.
114
115
size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
116
// Version of computeSerializedSizeInWords that takes a raw segment array.
117
118
size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
119
// Given a prefix of a serialized message, try to determine the expected total size of the message,
120
// in words. The returned size is based on the information known so far; it may be an underestimate
121
// if the prefix doesn't contain the full segment table.
122
//
123
// If the returned value is greater than `messagePrefix.size()`, then the message is not yet
124
// complete and the app cannot parse it yet. If the returned value is less than or equal to
125
// `messagePrefix.size()`, then the returned value is the exact total size of the message; any
126
// remaining bytes are part of the next message.
127
//
128
// This function is useful when reading messages from a stream in an asynchronous way, but when
129
// using the full KJ async infrastructure would be too difficult. Each time bytes are received,
130
// use this function to determine if an entire message is ready to be parsed.
131
132
kj::Array<word> serializeSegmentTable(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
133
// Returns the segments table for given message segments.
134
// Fully serialized message consists of the table and segments written consecutively.
135
136
// =======================================================================================
137
138
class InputStreamMessageReader: public MessageReader {
139
  // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
140
  // for a subclass specific to file descriptors.
141
142
public:
143
  InputStreamMessageReader(kj::InputStream& inputStream,
144
                           ReaderOptions options = ReaderOptions(),
145
                           kj::ArrayPtr<word> scratchSpace = nullptr);
146
  ~InputStreamMessageReader() noexcept(false);
147
148
  // implements MessageReader ----------------------------------------
149
  kj::ArrayPtr<const word> getSegment(uint id) override;
150
151
private:
152
  kj::InputStream& inputStream;
153
  byte* readPos;
154
155
  // Optimize for single-segment case.
156
  kj::ArrayPtr<const word> segment0;
157
  kj::Array<kj::ArrayPtr<const word>> moreSegments;
158
159
  kj::Array<word> ownedSpace;
160
  // Only if scratchSpace wasn't big enough.
161
162
  kj::UnwindDetector unwindDetector;
163
};
164
165
void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
166
                     ReaderOptions options = ReaderOptions(),
167
                     kj::ArrayPtr<word> scratchSpace = nullptr);
168
// Convenience function which reads a message using `InputStreamMessageReader` then copies the
169
// content into the target `MessageBuilder`, verifying that the message structure is valid
170
// (although not necessarily that it matches the desired schema).
171
//
172
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
173
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
174
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
175
176
void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
177
// Write the message to the given output stream.
178
179
void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
180
// Write the segment array to the given output stream.
181
182
// =======================================================================================
183
// Specializations for reading from / writing to file descriptors.
184
185
class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
186
  // A MessageReader that reads from a stream-based file descriptor.
187
188
public:
189
  StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
190
                        kj::ArrayPtr<word> scratchSpace = nullptr)
191
0
      : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
192
  // Read message from a file descriptor, without taking ownership of the descriptor.
193
194
  StreamFdMessageReader(kj::OwnFd fd, ReaderOptions options = ReaderOptions(),
195
                        kj::ArrayPtr<word> scratchSpace = nullptr)
196
0
      : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
197
  // Read a message from a file descriptor, taking ownership of the descriptor.
198
199
  ~StreamFdMessageReader() noexcept(false);
200
};
201
202
void readMessageCopyFromFd(int fd, MessageBuilder& target,
203
                           ReaderOptions options = ReaderOptions(),
204
                           kj::ArrayPtr<word> scratchSpace = nullptr);
205
// Convenience function which reads a message using `StreamFdMessageReader` then copies the
206
// content into the target `MessageBuilder`, verifying that the message structure is valid
207
// (although not necessarily that it matches the desired schema).
208
//
209
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
210
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
211
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
212
213
void writeMessageToFd(int fd, MessageBuilder& builder);
214
// Write the message to the given file descriptor.
215
//
216
// This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
217
// you catch this exception at the call site.  If throwing an exception is not acceptable, you
218
// can implement your own OutputStream with arbitrary error handling and then use writeMessage().
219
220
void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
221
// Write the segment array to the given file descriptor.
222
//
223
// This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
224
// you catch this exception at the call site.  If throwing an exception is not acceptable, you
225
// can implement your own OutputStream with arbitrary error handling and then use writeMessage().
226
227
// =======================================================================================
228
// inline stuff
229
230
0
inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
231
0
  return messageToFlatArray(builder.getSegmentsForOutput());
232
0
}
233
234
0
inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
235
0
  return computeSerializedSizeInWords(builder.getSegmentsForOutput());
236
0
}
237
238
0
inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
239
0
  writeMessage(output, builder.getSegmentsForOutput());
240
0
}
241
242
0
inline void writeMessageToFd(int fd, MessageBuilder& builder) {
243
0
  writeMessageToFd(fd, builder.getSegmentsForOutput());
244
0
}
245
246
}  // namespace capnp
247
248
CAPNP_END_HEADER