Line data Source code
1 : /* Generate prototypes, inlines and/or implementations for concurrent
2 : persistent shared maps based on chaining. A map can store a
3 : practically unbounded number of elements. If sized on creation for
4 : the maximum number of mapped elements, typical map operations are a
5 : fast O(1) time and map element overhead is a small O(1) space.
6 : Further, large numbers of composite map operations can be done
7 : concurrently with very low risk of conflicts.
8 :
9 : In the current implementation, each map chain has a version number.
10 : Operations that require changing a chain's connectivity (e.g.
11 : inserting or removing an element from a chain) or modifying an
12 : element managed by that chain, the chain's version number is
13 : increased by one (atomic fetch-and-or based) such that other
14 : potential users of keys managed by that chain detect and react
15 : appropriately to a potentially concurrent conflicting operation in
16 : progress. When an operation completes, the chain version number is
17 : increased by one again to notify other users the operation is no
18 : longer in progress and that the set of keys managed by that chain
19 : and/or values associated with those keys has potentially changed
20 : since the previous version. For example, lockfree queries can
21 : interoperate with this via a zero-copy
22 : try-speculatively-process-then-test pattern similar to that used in
23 : fd_tango for high throughput message processing.
24 :
25 : As such, there can be an arbitrary number of concurrent readers
26 : processing map keys. These readers will not interfere with each
27 : other and will not block any concurrent insert / remove / modify
28 : operations. Insert / remove / modify operations can potentially
29 : block each other. Since there are typically O(1) keys per chain, the
30 : probability of concurrent insert / remove / modify operations
31 : involving different keys blocking each other is small. Further, this
32 : controllable a priori by provisioning the number of chains
33 : appropriately. Concurrent operations on the same key are serialized
34 : (as they necessarily would be any implementation). Since the
35 : operations are HPC implementations, collisions are resolved as fast
36 : as is practical. The upshot is that the map supports massive
37 : concurrency while preserving concurrent operation serializability.
38 :
39 : Version numbers are stored with chain head pointers such that the
40 : cache traffic required for managing chain versioning is covered by
41 : the same cache traffic required for managing chains in a
42 : non-concurrent implementation (e.g. fd_map_chain). Operations do
43 : many internal integrity checking / bounds checking for use in high
44 : reliability applications.
45 :
46 : Lastly, fine grained versioning allows for concurrent execution of
47 : complex operations involving multiple keys simultaneously. This
48 : allows using the map as a concurrent transactional memory and for
49 : serialization of all map elements at a consistent point in time while
50 : minimizing impact on ongoing concurrent operations (e.g. snapshotting
51 : all the elements in the map).
52 :
53 : The main drawback of chain versioning is the extra memory footprint
54 : required for chain metadata storage. The current implementation
55 : supports indexing compression and uses atomic bit field techniques to
56 : minimize this overhead.
57 :
58 : Concurrent operation requires FD_HAS_ATOMIC. This will still work on
59 : platforms without FD_HAS_ATOMIC but concurrent operations will not be
60 : safe.
61 :
62 : In short, if you need a concurrent map, this is a lot better than
63 : protecting a non-concurrent implementation with a global lock. And,
64 : if you don't, it will be comparably performant to a non-concurrent
65 : implementation.
66 :
67 : This generator is designed for ultra tight coupling with pools,
68 : treaps, heaps, lists, other maps, etc. Likewise, a map can be
69 : persisted beyond the lifetime of the creating process, be used
70 : inter-process, relocated in memory, be naively
71 : serialized/deserialized, be moved between hosts, use index
72 : compression for cache and memory bandwidth efficiency, etc.
73 : Concurrency and flexibility are prioritized.
74 :
75 : Typical usage:
76 :
77 : struct myele {
78 : ulong key; // Technically "MAP_KEY_T MAP_KEY" (default is ulong key), managed by mymap when the element is in the mymap
79 : ulong next; // Technically "MAP_IDX_T MAP_NEXT" (default is ulong next), managed by mymap when the element is in the mymap
80 :
81 : ... key and next can be located arbitrarily in the element and
82 : ... can be reused for other purposes when the element is not in a
83 : ... mymap. The mapping of a key to an element in the element
84 : ... store is arbitrary. An element should not be moved /
85 : ... released from the element store while in the mymap.
86 :
87 : };
88 :
89 : typedef struct myele myele_t;
90 :
91 : #define MAP_NAME mymap
92 : #define MAP_ELE_T myele_t
93 : #include "tmpl/fd_map_chain_para.c"
94 :
95 : will declare the following APIs as a header only style library in the
96 : compilation unit:
97 :
98 : // A mymap_t is a stack declaration friendly quasi-opaque local
99 : // object used to hold the state of a local join to a mymap.
100 : // Similarly, a mymap_query_t and a mymap_iter_t hold the local
101 : // state of an ongoing local query and local iteration
102 : // respectively. E.g. it is fine to do mymap_t join[1];" to
103 : // allocate a mymap_t but the contents should not be used directly.
104 :
105 : typedef struct mymap_private mymap_t;
106 : typedef struct mymap_query_private mymap_query_t;
107 : typedef struct mymap_iter_private mymap_iter_t;
108 :
109 : // mymap_ele_max_max returns the maximum element store capacity
110 : // compatible with a mymap.
111 :
112 : ulong mymap_ele_max_max( void );
113 :
114 : // mymap_chain_max returns the maximum number of chains supported
115 : // by a mymap. Will be an integer power-of-two.
116 :
117 : ulong mymap_chain_max( void );
118 :
119 : // mymap_chain_cnt_est returns a reasonable number of chains to use
120 : // for a map that is expected to hold up to ele_max_est elements.
121 : // ele_max_est will be clamped to be in [1,mymap_ele_max_max()] and
122 : // the return value will be an integer power-of-two in
123 : // [1,mymap_chain_max()].
124 :
125 : ulong mymap_chain_cnt_est( ulong ele_max_est );
126 :
127 : // mymap_{align,footprint} returns the alignment and footprint
128 : // needed for a memory region to be used as a mymap. align will be
129 : // an integer power-of-two and footprint will be a multiple of
130 : // align. footprint returns 0 if chain_cnt is not an integer
131 : // power-of-two in [1,mymap_chain_max()].
132 : //
133 : // mymap_new formats a memory region with the required alignment
134 : // and footprint into a mymap. shmem points in the caller's
135 : // address space to the memory region to use. Returns shmem on
136 : // success (mymap has ownership of the memory region) and NULL on
137 : // failure (no changes, logs details). The caller is not joined on
138 : // return. The mymap will be empty with all map chains at version
139 : // 0 (unlocked).
140 : //
141 : // mymap_join joins the caller to an existing mymap. ljoin points
142 : // to a mymap_t compatible memory region in the caller's address
143 : // space, shmap points in the caller's address space to the memory
144 : // region containing the mymap, shele points in the caller's
145 : // address space to mymap's element store and ele_max gives the
146 : // element store's capacity in [0,ele_max_max]. Returns a handle
147 : // to the caller's local join on success (join has ownership of the
148 : // ljoin region) and NULL on failure (no changes, logs details).
149 : //
150 : // mymap_leave leaves a mymap join. join points to a current local
151 : // join. Returns the memory region used for the join on success
152 : // (caller has ownership on return and the caller is no longer
153 : // joined) and NULL on failure (no changes, logs details). Use the
154 : // join accessors before leaving to get shmap, shele and ele_max
155 : // used by the join if needed.
156 : //
157 : // mymap_delete unformats a memory region used as a mymap. Assumes
158 : // shmap points in the caller's address space to a memory region
159 : // containing the mymap and that there are no joins. Returns shmem
160 : // on success (caller has ownership of the memory region, any
161 : // remaining elements still in the mymap are released to the caller
162 : // implicitly) and NULL on failure (no changes, logs details).
163 :
164 : ulong mymap_align ( void );
165 : ulong mymap_footprint( ulong chain_cnt );
166 : void * mymap_new ( void * shmem, ulong chain_cnt, ulong seed );
167 : mymap_t * mymap_join ( void * ljoin, void * shmap, void * shele, ulong ele_max );
168 : void * mymap_leave ( mymap_t * join );
169 : void * mymap_delete ( void * shmap );
170 :
171 : // mymap_{chain_cnt,seed} return the mymap configuration. Assumes
172 : // join is a current local join. The values will be valid for the
173 : // mymap lifetime.
174 :
175 : ulong mymap_chain_cnt( mymap_t const * join );
176 : ulong mymap_seed ( mymap_t const * join );
177 :
178 : // mymap_{shmap,shele,ele_max} return join details. Assumes join
179 : // is a current local join. The values will be valid for the join
180 : // lifetime. mymap_{shmap_const,shele_const} are const correct
181 : // versions.
182 :
183 : void const * mymap_shmap_const( mymap_t const * join );
184 : void const * mymap_shele_const( mymap_t const * join );
185 : ulong mymap_ele_max ( mymap_t const * join );
186 :
187 : void * mymap_shmap( mymap_t * join );
188 : void * mymap_shele( mymap_t * join );
189 :
190 : // mymap_key_{eq,hash} expose the provided MAP_KEY_{EQ,HASH} macros
191 : // as inlines with strict semantics. They assume that the provided
192 : // pointers are in the caller's address space to keys that will not
193 : // be changed during the call. They retain no interest in any keys
194 : // on return.
195 : //
196 : // mymap_key_eq returns 1 if *k0 and *k1 are equal and 0 otherwise.
197 : //
198 : // mymap_key_hash returns the hash of *key using the hash function
199 : // seed. Should ideally be a random mapping from a MAP_KEY_T to a
200 : // ulong but this depends on what the user actually used for
201 : // MAP_KEY_HASH. The seed used by a particular mymap instance can
202 : // be obtained above.
203 :
204 : int mymap_key_eq ( ulong * k0, ulong * k1 );
205 : ulong mymap_key_hash( ulong * key, ulong seed );
206 :
207 : // mymap_backoff does FD_SPIN_PAUSE a random number of times. The
208 : // number of pauses is an approximately uniform IID random number
209 : // in [0,scale/2^16] where scale is in [0,2^32). Specifically, the
210 : // number of pauses is:
211 : //
212 : // floor( scale r / 2^48 )
213 : //
214 : // where r is a non-deterministic 32-bit uniform IID random number.
215 : // Under the hood, r is generated by hashing the user provided seed
216 : // and the least significant 32-bits of the CPU tickcounter.
217 : // Ideally, seed is a 32-bit globally unique identifier for the
218 : // logical thread of execution but this is up to the application to
219 : // specify and rarely matters in practice. This is a useful
220 : // building block for random exponential backoffs.
221 :
222 : void mymap_backoff( ulong scale, ulong seed );
223 :
224 : // mymap_query_memo returns the key_hash of the query associated
225 : // with the query's key to allow users to minimize potentially
226 : // expensive key hash computations in various operations.
227 : //
228 : // mymap_query_ele returns a pointer in the caller's address space
229 : // to the element store element associated with the query or a
230 : // sentinel value. The sentinel value is application dependent and
231 : // thus arbitrary (e.g. not necessarily in the element store,
232 : // including NULL, a local temporary used as a bit bucket, etc).
233 : // Assumes query is valid. The lifetime of the returned pointer
234 : // depends on the query. mymap_query_ele_const is a const correct
235 : // version.
236 :
237 : ulong mymap_query_memo ( mymap_query_t const * query );
238 : myele_t const * mymap_query_ele_const( mymap_query_t const * query );
239 : myele_t * mymap_query_ele ( mymap_query_t * query );
240 :
241 : // mymap_hint hints that the caller plans to do an operation
242 : // involving key soon. Assumes join is a current local join, key
243 : // points to a valid key in the caller's address space for the
244 : // duration of the call and query points to a local scratch to hold
245 : // info about the hint. Retains no interest in key. On return,
246 : // the query memo will be initialized.
247 : //
248 : // flags is a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_USE_HINT
249 : // is set, this will assume that query's memo is already
250 : // initialized for key (e.g. mostly useful for hashless
251 : // prefetching). If FD_MAP_FLAG_PREFETCH_META /
252 : // FD_MAP_FLAG_PREFETCH_DATA is set, this will issue a prefetch for
253 : // key's mymap metadata (i.e. lock version) / the element at the
254 : // start of key's chain if any (i.e. typically the key of interest
255 : // in a well managed map). FD_MAP_FLAG_PREFETCH combines both for
256 : // convenience. This can be used to overlap key access latency
257 : // with unrelated operations. All other flags are ignored.
258 :
259 : void
260 : mymap_hint( MAP_(t) const * join
261 : MAP_KEY_T const * key
262 : MAP_(query_t) * query,
263 : int flags );
264 :
265 : // mymap_insert inserts into a mymap a mapping from a key to an
266 : // element store element. ele points in the caller's address space
267 : // to the element and ele->key is initialized to the key. flags is
268 : // a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set /
269 : // clear in flags, this is allowed / not allowed to block the
270 : // caller. Assumes join is a current local join, element is not in
271 : // the mymap and the key is not currently mapped to anything in the
272 : // mymap. This is a non-blocking fast O(1) and supports highly
273 : // concurrent operation.
274 : //
275 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
276 : // (negative) on failure. On success, ele was inserted into the
277 : // mymap at some point during the call (mymap took ownership of the
278 : // element at that time but the application is free to manage all
279 : // fields of the element except ele->key, ele->next and, if the
280 : // mymap is memoized, ele->hash ... insert will initialize the hash
281 : // field to mymap_key_hash( key, seed ) and this will be stable
282 : // while ele is in mymap). On failure, the mymap was not modified
283 : // by the call, no changes of ownership occurred in the call and
284 : // returns:
285 : //
286 : // - FD_MAP_ERR_INVAL: ele is not a pointer to an element store
287 : // element.
288 : //
289 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
290 : // progress at some point during the call. Try again later (e.g.
291 : // after a random exponential backoff). Specifically, this
292 : // operation requires locking the map chain associated with key.
293 : // Since there are typically O(1) keys per chain, the probability
294 : // of getting AGAIN due to a false conflict is negligible even
295 : // under highly concurrent loads. Since insert / remove are fast
296 : // O(1) operations, any remaining conflicts, real or imagined,
297 : // are typically very short lived. Never returned for a blocking
298 : // call.
299 : //
300 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
301 : // query try/test, txn try/test or iter lock/unlock.
302 :
303 : int mymap_insert( mymap_t * join, myele_t * ele, int flags );
304 :
305 : // mymap_remove removes the mapping (if any) for key from the
306 : // mymap. On return, query will contain information about the
307 : // removed mapping and memo will be initialized for key. sentinel
308 : // gives the query element pointer value (arbitrary) to pass
309 : // through when this did not remove a mapping for any reason.
310 : // flags is a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING
311 : // is set / clear in flags, this is allowed / not allowed to block
312 : // the caller. If FD_MAP_FLAG_USE_HINT is set, this assumes
313 : // query's memo is already initialized for key. Assumes join is a
314 : // current local join and key is valid for the duration of the
315 : // call. Retains no interest in key, sentinel or query. This is a
316 : // non-blocking fast O(1) and supports highly concurrent operation.
317 : //
318 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
319 : // (negative) on failure. On success, key's mapping was removed at
320 : // some point during the call. mymap_query_ele( query ) will point
321 : // in the caller's address space to the element store element where
322 : // key mapped just before it was removed (element ownership
323 : // transferred to the caller at that time). On failure, no changes
324 : // were made by this call, mymap_query_ele( query ) will be
325 : // sentinel and:
326 : //
327 : // - FD_MAP_ERR_KEY: Key was not found in the mymap at some point
328 : // during the call.
329 : //
330 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
331 : // progress at some point during the call. Same considerations
332 : // as insert above. Never returned for a blocking call.
333 : //
334 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
335 : // point during the call.
336 : //
337 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
338 : // query try/test, txn try/test or iter lock/unlock.
339 :
340 : int
341 : mymap_remove( mymap_t * join,
342 : ulong const * key,
343 : myele_t const * sentinel,
344 : mymap_query_t * query,
345 : int flags );
346 :
347 : // mymap_modify_try tries to start modification of the mymap
348 : // element corresponding to key. On return, query will hold
349 : // information about the try and memo will be initialized for key.
350 : // sentinel gives the query element pointer value (arbitrary) to
351 : // pass through when it is not safe to try. flags is a bit-or of
352 : // FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set / clear, this
353 : // call is allowed / not allowed to block the caller. If
354 : // FD_MAP_FLAG_USE_HINT is set, this assumes query's memo is
355 : // already initialized for key. If FD_MAP_FLAG_ADAPTIVE is set /
356 : // clear, this call should / should not adapt the mymap to
357 : // accelerate future operations on this key. Adaptation for a key
358 : // can potentially slow future operations for other keys. The
359 : // marginal benefit of adaptation for a key grows linearly with the
360 : // number of keys managed by the key's chain. Assumes join is a
361 : // current local join and key is valid for the duration of the
362 : // call. Retains no interest in key, sentinel or query. This is a
363 : // non-blocking fast O(1) and supports highly concurrent operation.
364 : //
365 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
366 : // (negative) on failure. On success, mymap_query_ele( query )
367 : // will point in the caller's address space to the element to
368 : // modify and the chain that manages key will be locked. The mymap
369 : // retains ownership of this element and management of the key and
370 : // next fields. The caller is free to modify any other fields. On
371 : // failure, the mymap was not modified by this call,
372 : // mymap_query_ele( query ) will be sentinel and returns:
373 : //
374 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
375 : // during the call.
376 : //
377 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
378 : // progress at some point during the try. Same considerations as
379 : // insert above. Never returned for a blocking call.
380 : //
381 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
382 : // point during the call.
383 : //
384 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
385 : // try/test, txn try/test or iter_lock/unlock on the same thread.
386 :
387 : int
388 : mymap_modify_try( mymap_t * join,
389 : ulong const * key,
390 : myele_t * sentinel,
391 : mymap_query_t * query,
392 : int flags );
393 :
394 : // mymap_modify_test finishes an in-progress modification. Assumes
395 : // query is valid and the caller is in a modify try. Returns
396 : // FD_MAP_SUCCESS (0). On return, the caller will no longer be in
397 : // a modify try. Guaranteed to succeed.
398 : //
399 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
400 : // try/test, txn try/test or iter_lock/unlock on the same thread.
401 :
402 : int mymap_modify_test( mymap_query_t * query );
403 :
404 : // mymap_query_try tries to speculatively query a mymap for key.
405 : // flags is a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_USE_HINT
406 : // is set, this assumes query's memo is already initialized for
407 : // key. On return, query will hold information about the try and
408 : // memo will be initialized for key. sentinel gives the query
409 : // element pointer value (arbitrary) to pass through when it is not
410 : // safe to try the query. Assumes join is a current local join and
411 : // key is valid for the duration of the call. Does not modify the
412 : // mymap and retains no interest in key, sentinel or query. This
413 : // is a non-blocking fast O(1) and supports highly concurrent
414 : // operation.
415 : //
416 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
417 : // (negative) on failure. On success, key mapped to an element in
418 : // the element store at some point during the call.
419 : // mymap_query_ele( query ) will point in the caller's address
420 : // space to the element store element where key mapped at that
421 : // time. The mymap retains ownership of this element but the
422 : // caller can zero copy speculatively process the element. On
423 : // failure, mymap_query_ele( query ) will be sentinel and returns:
424 : //
425 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
426 : // during the call.
427 : //
428 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
429 : // progress at some point during the call. Try again later (e.g.
430 : // after a random exponential backoff). Unlike insert and
431 : // remove, this call does _not_ require a lock on the chain
432 : // associated with key. As such, AGAIN can only be caused by
433 : // concurrent operations that require a lock on the chain that
434 : // manages key (with similar considerations as insert and remove)
435 : // and this will never interfere with any other concurrent
436 : // operation. Among the many implications, a query will never
437 : // delay a concurrent query and AGAIN will never be returned if
438 : // only concurrent queries are in progress.
439 : //
440 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
441 : // point during the call.
442 : //
443 : // IMPORTANT SAFETY TIP! THE CALLER SHOULD BE PREPARED TO HANDLE
444 : // ARBITRARY AND/OR INCONSISTENT VALUES FOR ELEMENT FIELDS DURING
445 : // SPECULATIVE PROCESSING. CALLERS SHOULD NOT COMMIT ANY RESULTS
446 : // OF SPECULATIVE PROCESSING UNTIL IT TESTS THE QUERY WAS
447 : // SUCCESSFUL.
448 : //
449 : // The simplest form of speculative processing is to copy the
450 : // element from the element store into a local temporary, test that
451 : // the speculation was valid, and then process the local temporary
452 : // copy at its leisure. Zero copy, more selective copying and/or
453 : // writing speculative results into local tempoaries are more
454 : // advanced examples of speculative processing.
455 : //
456 : // Use mymap_modify to do a blocking (non-speculative) and/or
457 : // adaptive query (just don't actually modify the element).
458 : //
459 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
460 : // try/test, txn try/test or iter_lock/unlock on the same thread.
461 :
462 : int
463 : mymap_query_try( mymap_t const * join,
464 : ulong const * key,
465 : myele_t const * sentinel,
466 : mymap_query_t * query,
467 : int flags );
468 :
469 : // mymap_query_test tests if an in-progress query is still valid.
470 : // Assumes query is valid, we are still in a query try and chain
471 : // version numbers have not wrapped since we started the try.
472 : // Returns FD_MAP_SUCCESS (0) if the query is still valid and
473 : // FD_MAP_ERR_AGAIN (negative) if a potentially conflicting
474 : // operation was in progress at some point during the try.
475 : //
476 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
477 : // try/test, txn try/test or iter_lock/unlock on the same thread.
478 :
479 : int mymap_query_test( mymap_query_t const * query );
480 :
481 : // mymap_txn_key_max_max() returns the theoretical maximum number
482 : // of keys that can be in a transaction. (Practically unbounded.)
483 :
484 : ulong mymap_txn_key_max_max( void );
485 :
486 : // mymap_txn_{align,footprint} return the alignment and footprint
487 : // required for a mymap_txn_t that can support at least key_max
488 : // keys. align will be an integer power of two. footprint will be
489 : // a multiple of align. Returns 0 if key_max > key_max_max.
490 : //
491 : // mymap_txn_init formats a memory region with the required
492 : // alignment and footprint as a txn_t that can support at least
493 : // key_max keys. ltxn points in the caller's address space to the
494 : // memory region to use. Assumes join is a current local join.
495 : // On success, returns ltxn (txn will have ownership of the memory
496 : // region, txn will be valid with empty speculative and locked key
497 : // sets). The lifetime of the join should be at least the lifetime
498 : // of the txn. On failure (obviously bad inputs), returns NULL (no
499 : // changes).
500 : //
501 : // mymap_txn_fini unformats a memory region as a txn_t and returns
502 : // a pointer to the underlying memory. On success, returns ltxn.
503 : // The caller has ownership of the memory region on return. On
504 : // failure (e.g. NULL input), returns NULL (no changes).
505 :
506 : ulong mymap_txn_align ( void );
507 : ulong mymap_txn_footprint( ulong key_max );
508 : mymap_txn_t * mymap_txn_init ( void * ltxn, mymap_t * join, ulong key_max );
509 : void * mymap_txn_fini ( mymap_txn_t * txn );
510 :
511 : // mymap_txn_add indicates that key may be used in a txn. Assumes
512 : // txn is valid and not in a try and key is valid for duration of
513 : // the call. Retains no interest in key. A zero value for lock
514 : // indicates the key will be operated on speculatively. A non-zero
515 : // value indicates the key will potentially be inserted / removed /
516 : // modified by the transaction. It is okay to have a mixture of
517 : // speculative and locked keys in a transaction. Further, it is
518 : // okay to add the same key multiple times (though not particularly
519 : // efficient), including as a mixture of speculative and locked (if
520 : // _any_ adds of same key are locked, it will be treated as a
521 : // locked key for the txn overall). Returns FD_MAP_SUCCESS (zero)
522 : // on success (txn is valid and not in a try) and FD_MAP_ERR_INVAL
523 : // (negative) on failure (txn is valid and not in a try but key was
524 : // not added). INVAL is only possible when more than key_max adds
525 : // have been done since init.
526 :
527 : int mymap_txn_add( mymap_txn_t * txn, mymap_key_t const * key, int lock );
528 :
529 : // mymap_txn_try returns FD_MAP_SUCCESS (zero) if it is safe to try
530 : // the transaction and FD_MAP_ERR_AGAIN (negative) if the user
531 : // should try again later (e.g. after a random exponential
532 : // backoff). flags is a bit-of of FD_MAP_FLAG flags. If
533 : // FD_MAP_FLAG_BLOCKING is set / clear, this call is allowed / not
534 : // allowed to block the caller. The upper 30-bits of flags can be
535 : // used to provide a seed for random backoffs (but this is up to
536 : // the application and rarely matters in practice). Assumes txn is
537 : // valid and not in a try. On success, txn will be valid and in a
538 : // try. On a failure, txn will be valid and not in a try.
539 : //
540 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
541 : // try/test, query try/test or iter_lock/unlock on the same thread.
542 : //
543 : // To better under the restrictions on nesting and interleaving,
544 : // mymap_{insert,remove,query_try,modify_try,query_try} will fail
545 : // with FD_MAP_ERR_AGAIN for any key managed by a chain locked by
546 : // the txn but can succeed for keys on managed by other chains.
547 : // This behavior is unpredictable as it depends on the keys in the
548 : // txn, keys not in the transaction, map seed, map chain count and
549 : // user provided key hash function. Interleaving a query_test,
550 : // modify_test, iter_unlock can be similarly unpredictable. Worse,
551 : // an interleaved modify_test or iter_unlock can muck up the chain
552 : // locks and used by the txn try. Similarly for other cases.
553 : //
554 : // IMPORTANT SAFETY TIP! If some txn keys were speculative, the
555 : // caller should not rely on any reads from the corresponding
556 : // element until the transaction tests successfully. Similar
557 : // considerations as mymap_query_try.
558 :
559 : int mymap_txn_try( mymap_txn_t * txn, int flags );
560 :
561 : // mymap_txn_hint behaves _identially_ to mymap_hint but is allowed
562 : // to assume we are in a txn try and key was added to the txn as
563 : // either speculative or locked.
564 : //
565 : // mymap_txn_{insert,remove} behave _identically_ to
566 : // mymap_{insert,remove} from the caller's point of view but
567 : // assumes we are in a txn try and key was added to the txn as
568 : // locked. These will never return FD_MAP_ERR_AGAIN.
569 : //
570 : // Similarly, mymap_txn_query behaves _identically_ to
571 : // mymap_query_try from the caller's point of view but assumes we
572 : // are in a txn try and key was added to txn as either speculative
573 : // or locked. Will never return FD_MAP_ERR_AGAIN.
574 : //
575 : // Likewise, mymap_txn_modify behaves _identically_ to
576 : // mymap_modify_try from the caller's point of view but assumes we
577 : // are in a txn try and key was added to txn as locked. It will
578 : // never return FD_MAP_ERR_AGAIN.
579 : //
580 : // There is no mymap_query_test or mymap_modify_test because these
581 : // are part of the overall txn test.
582 : //
583 : // IMPORTANT SAFETY TIP!
584 : //
585 : // These never should be used outside a txn try.
586 : //
587 : // IMPORTANT SAFETY TIP!
588 : //
589 : // For a speculative txn key, mymap_query can return FD_MAP_ERR_KEY
590 : // and/or FD_MAP_ERR_CORRUPT if there are locked concurrent
591 : // operations on the chain managing key (e.g a concurrent remove of
592 : // a key that happens to be on the same chain). When such
593 : // operations are possible, these errors can be canaries that the
594 : // transaction has already failed (testing the txn is still
595 : // necessary to it "official"). CORRUPT in this situation is most
596 : // likely an "unofficial" failure than memory corruption.
597 : // Similarly, while mymap_txn_query is guaranteed give a pointer to
598 : // an element store element on success, there is no guarantee it
599 : // will be to the correct element, a well formed element (or even
600 : // to the same location if used multiple times in the try). When
601 : // such concurrent operations are not possible (e.g. single
602 : // threaded operation), SUCCESS, KEY, CORRUPT and the element
603 : // pointer returned have their usual interpretations.
604 : //
605 : // TL;DR speculative txn keys are best used for common stable
606 : // constant-ish read-only data to minimize concurrent complex
607 : // transactions using these common keys from unnecessarily blocking
608 : // each other.
609 : //
610 : // TL;DR resolve all speculative txn keys to elements at
611 : // transaction start exactly once for sanity.
612 : //
613 : // TL;DR avoid using speculative txn keys at all unless very well
614 : // versed in lockfree programming idioms and gotchas.
615 :
616 : int mymap_txn_hint ( mymap_t const * join, ulong const * key, mymap_query_t * query, int flags );
617 : int mymap_txn_insert( mymap_t * join, myele_t * ele );
618 : int mymap_txn_remove( mymap_t * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query, int flags );
619 : int mymap_txn_modify( mymap_t * join, ulong const * key, myele_t * sentinel, mymap_query_t * query, int flags );
620 : int mymap_txn_query ( mymap_t const * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query, int flags );
621 :
622 : // mymap_txn_test returns FD_MAP_SUCCESS (zero) if the txn try
623 : // succeeded and FD_MAP_ERR_AGAIN (negative) if it failed (e.g. the
624 : // test detected a potentially conflicting concurrent operation
625 : // during the try). On success, any results from processing of
626 : // keys marked as speculative can be trusted. On failure, the
627 : // mymap was not changed by the try. Regardless of return value,
628 : // txn will _not_ be in a try on return _but_ will still be valid.
629 : // As such, if a transaction fails, it can be retried (e.g. after a
630 : // random exponential backoff) without needing to recreate it (e.g.
631 : // no need to fini then init/add again). Assumes txn is in a try
632 : // and, for any txn speculative keys, no wrapping of txn version
633 : // numbers has occurred since the try started.
634 : //
635 : // IMPORTANT SAFETY TIP! This is guaranteed to succeed if no keys
636 : // were added to the transaction as speculative.
637 : //
638 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
639 : // try/test, query try/test or iter_lock/unlock on the same thread.
640 :
641 : int mymap_txn_test( mymap_txn_t * txn );
642 :
643 : // mymap_iter_lock locks zero or more map chains. Assumes join is
644 : // a current local join. On input, lock_seq[i] for i in
645 : // [0,lock_cnt) gives the set of chains to lock. flags is a bit-or
646 : // of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set / not set,
647 : // this call is allowed / not allowed to block the caller. Assumes
648 : // join, lock_seq and lock_cnt are valid and the caller does not
649 : // already have any of these locks. The upper 30-bits of flags
650 : // can be used to provide a seed for random backoffs (but this is
651 : // up to the application and rarely necessary). In particular,
652 : // lock_seq should contain unique values in [0,chain_cnt), which
653 : // also implies lock_cnt is at most chain_cnt. Retains no interest
654 : // in lock_seq. Returns FD_MAP_SUCCESS (zero) on success and
655 : // FD_MAP_ERR_AGAIN (negative) on failure. On return:
656 : //
657 : // FD_MAP_SUCCESS: lock_seq will be a permutation of the input
658 : // giving the actual order (from oldest to newest) in which the
659 : // locks were acquired. This can be used, for example, to unlock
660 : // in the same order and can be used by the caller to optimize
661 : // the order for iterating over keys to reduce the amount of
662 : // contention with other concurrent operations. If there were no
663 : // potentially conflicting concurrent operations during the call,
664 : // lock_seq will be in the input order.
665 : //
666 : // FD_MAP_ERR_AGAIN: a potentially conflicting operation was in
667 : // progress at some point during the call. lock_seq might have
668 : // been changed (but will still be a permutation of the input).
669 : // The mymap itself wasn't changed by the call.
670 : //
671 : // FD_MAP_ERR_INVAL: join, lock_seq and/or lock_cnt were
672 : // obviously invalid. Same considerations as FD_MAP_ERR_AGAIN.
673 : //
674 : // Guaranteed to succeed if blocking (but will not return to the
675 : // caller until all the requested chains are locked).
676 : //
677 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
678 : // try/test, query try/test or txn try/test on the same thread.
679 :
680 : int
681 : mymap_iter_lock( mymap_t * join,
682 : ulong * lock_seq,
683 : ulong lock_cnt,
684 : int flags );
685 :
686 : // mymap_iter_unlock unlocks chains lock_seq[i] for i in
687 : // [0,lock_cnt) in that order. Assumes join is a current local
688 : // join, lock_seq and lock_cnt are valid (same requirements as
689 : // mymap_iter_lock) and the caller has a lock on those chains.
690 : // Retains no interest in lock_seq. Guaranteed to succeed.
691 : //
692 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
693 : // try/test, query try/test or txn try/test on the same thread.
694 :
695 : void
696 : mymap_iter_unlock( mymap_t * join,
697 : ulong const * lock_seq,
698 : ulong lock_cnt );
699 :
700 : // mymap_iter_chain_idx returns the index of the map chain that
701 : // manages key. Useful for iterating over groups of related keys
702 : // when the map hash function is designed to group all related keys
703 : // onto the same chain.
704 :
705 : ulong
706 : mymap_iter_chain_idx( mymap_t const * join,
707 : ulong const * key );
708 :
709 : // mymap_{iter,iter_done,iter_next,iter_ele,iter_ele_const} iterate
710 : // over a single map chain. Assumes join is a current local join,
711 : // chain_idx is in [0,mymap_chain_cnt(join)) and the caller lock on
712 : // chain idx or the chain is otherwise known to be idle.
713 : //
714 : // These are building blocks for concurrent parallel iteration. As
715 : // the locking and ordering requirements for such an iterator are
716 : // very application specific, no default global iterators are
717 : // provided (i.e. a generic global iterator will need to be so
718 : // conservative on locking than typical application requirements,
719 : // it is practically more mischievious than useful). E.g. a mymap
720 : // snapshot might lock all chains to get the state of the entire
721 : // mymap at a consistent point in time. For each chain (in the
722 : // order given by the lock acquisition), the snapshot would
723 : // serialize all keys on that chain and then unlock it
724 : // incrementally.
725 :
726 : mymap_iter_t mymap_iter ( mymap_t const * join, ulong chain_idx );
727 : mymap_iter_t mymap_iter_done ( mymap_iter_t iter );
728 : mymap_iter_t mymap_iter_next ( mymap_iter_t iter );
729 : myele_t const * mymap_iter_ele_const( mymap_iter_t iter );
730 : myele_t * mymap_iter_ele ( mymap_iter_t iter );
731 :
732 : // mymap_reset removes all elements from the mymap. Caller has
733 : // ownership of all items removed on return. Assumes that join is
734 : // a current local join and the caller has a lock on all map chains
735 : // or the map is otherwise known to be idle.
736 :
737 : void mymap_reset( mymap_t * join );
738 :
739 : // mymap_verify returns FD_MAP_SUCCESS (0) if the join, underlying
740 : // map and underlying element store give a valid mapping of unique
741 : // keys to unique elements in the element store. Assumes that
742 : // caller has a lock on all map chains or the map is otherwise
743 : // known to be idle. Returns FD_MAP_ERR_INVAL (negative) otherwise
744 : // (no changes by this call, logs details).
745 :
746 : int mymap_verify( mymap_t const * join );
747 :
748 : // mymap_strerror converts an FD_MAP_SUCCESS / FD_MAP_ERR code into
749 : // a human readable cstr. The lifetime of the returned pointer is
750 : // infinite. The returned pointer is always to a non-NULL cstr.
751 :
752 : char const * mymap_strerror( int err );
753 :
754 : Do this as often as desired in a compilation unit to get different
755 : types of concurrent maps. Options exist for generating library
756 : header prototypes and/or library implementations for concurrent maps
757 : usable across multiple compilation units. Additional options exist
758 : to use index compression, different hashing functions, key comparison
759 : functions, etc as detailed below.
760 :
761 : To better understand the insert/remove/{modify,query}_{try,test}
762 : APIs:
763 :
764 : ... basic insert
765 :
766 : myele_t * ele = ... acquire an unused element from the element store
767 :
768 : ... populate ele appropriately, including
769 :
770 : ele->key = ... key associated with this element
771 :
772 : int err = mymap_insert( join, ele, FD_MAP_FLAG_BLOCKING );
773 :
774 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
775 :
776 : ... If err is FD_MAP_ERR_INVAL, ele did not point at an element
777 : ... store element.
778 : ...
779 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
780 : ... conflicting operation in progress on the mymap during the
781 : ... call. We can try again later (e.g. after a random backoff or
782 : ... doing other non-conflicting work).
783 :
784 : } else {
785 :
786 : ... At this point, a mapping from key to the element store
787 : ... element pointed to by ele in our address space was added
788 : ... during the call. ele->key will be stable while in the mymap.
789 : ... Neither ele->key nor ele->next should be modified by the
790 : ... application while in the mymap. The application is free to
791 : ... manage all other fields of the element as desired.
792 :
793 : }
794 :
795 : ... basic remove
796 :
797 : ulong key = ... key to remove
798 :
799 : mymap_query_t query[1];
800 : int err = mymap_remove( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
801 : mymap_ele_t * ele = mymap_query_ele( query );
802 :
803 : if( FD_UNLIKELY( err ) ) {
804 :
805 : ... At this point, ele==sentinel==NULL.
806 : ...
807 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
808 : ... point during the remove.
809 : ...
810 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
811 : ... conflicting operation in progress during the remove. We can
812 : ... try again later (e.g. after a random backoff or doing other
813 : ... non-conflicting work). (Not possible in this example.)
814 : ...
815 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
816 : ... at some point during the call. (Usually abortive.)
817 :
818 : } else {
819 :
820 : ... At this point, ele points into the element store (non-NULL),
821 : ... ele->key matches key, key mapped to that element before the
822 : ... remove, and we have ownership of that element.
823 :
824 : ... release ele to the element store
825 :
826 : }
827 :
828 : ... basic modify
829 :
830 : ulong key = ... key to modify
831 :
832 : mymap_query_t query[1];
833 : int err = mymap_modify_try( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
834 : mymap_ele_t * ele = mymap_query_ele( query );
835 :
836 : if( FD_UNLIKELY( err ) ) {
837 :
838 : ... At this point, ele==sentinel==NULL.
839 : ...
840 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
841 : ... point during the try.
842 : ...
843 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
844 : ... conflicting operation in progress during the try. We can try
845 : ... again later (e.g. after a random backoff or doing other
846 : ... non-conflicting work). (Not possible in this example.)
847 : ...
848 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
849 : ... at some point during the call. (Usually abortive.)
850 :
851 : } else {
852 :
853 : ... At this point, ele points in our address space to an element
854 : ... store element, ele->key matches key and we are in a modify try
855 : ... such that it is safe to modify fields ele not managed by the
856 : ... mymap.
857 :
858 : ... Modify application managed fields of ele here.
859 :
860 : ... IMPORTANT SAFETY TIP! IF THE USER WANTS TO SUPPORT ROLLING
861 : ... BACK A MODIFICATION AT THIS POINT, THEY CAN DO SO BY SAVING
862 : ... THE ORIGINAL VALUE OF ELE BEFORE MODIFYING ANY FIELDS AND
863 : ... THEN RESTORING IT HERE.
864 :
865 : ... Finish the modification (guaranteed to succeed)
866 :
867 : mymap_modify_test( query );
868 :
869 : ... At this point, the modification is done and we are no
870 : ... longer in a try.
871 :
872 : }
873 :
874 : ... basic speculative query
875 :
876 : ulong key = ... key to query
877 :
878 : mymap_query_t query[1];
879 : int err = mymap_query_try( join, &key, NULL, query, 0 );
880 : mymap_ele_t const * ele = mymap_query_ele_const( query );
881 :
882 : if( FD_UNLIKELY( err ) ) {
883 :
884 : ... At this point, ele==sentinel==NULL.
885 : ...
886 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
887 : ... point during the try.
888 : ...
889 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
890 : ... conflicting operation in progress during the try and we can
891 : ... try again later (e.g. after a random backoff or doing other
892 : ... non-conflicting work).
893 : ...
894 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
895 : ... during the call. (Usually abortive.)
896 :
897 : } else {
898 :
899 : ... At this point, ele points in our address space to an element
900 : ... in the element store (non-NULL) and ele->key matched key at
901 : ... some point during the try.
902 :
903 : ... Speculatively process ele here.
904 : ...
905 : ... DO NOT TRUST ANY RESULTS OF THIS SPECULATIVE PROCESSING YET.
906 : ... THERE IS NO GUARANTEE YET THAT A CONCURRENT USER HASN'T
907 : ... CHANGED THE MYMAP IN A WAY THAT COULD YIELD ARBITRARY AND
908 : ... INCONSISTENT RESULTS.
909 : ...
910 : ... The simplest and most common form of speculative processing
911 : ... is to copy the needed portions of ele into a local stack
912 : ... temp.
913 : ...
914 : ... Note: concurrent operations could include removing key from
915 : ... the mymap (and maybe multiple cycles of inserting and
916 : ... removing it and then at potentially different element store
917 : ... locations). That's not an issue practically as the ele
918 : ... pointer here will be to an element compatible memory region
919 : ... that will continue to exist regardless and we shouldn't be
920 : ... trusting any query reads yet (the query test will detect if
921 : ... if these can be trusted).
922 : ...
923 : ... Rant: If ele is more complex than plain-old-data, so long ele
924 : ... is using allocators like fd_alloc and fd_wksp for dynamically
925 : ... allocated fields (e.g. not using the giant steaming pile of
926 : ... page based memory virtual memory, operating system, language
927 : ... and standard library fail that is heap based allocation ala
928 : ... malloc/free), concurrent removes are _still_ fine for the
929 : ... exact same reason. That is, the memory that actually backed
930 : ... dynamically allocated fields will still continue to exist
931 : ... post remove ... you know ... just like reality (turns out,
932 : ... surprise, "free" doesn't actually uninstall any DIMMs and
933 : ... malloc/free are the worst possible abstraction for resource
934 : ... management).
935 : ...
936 : ... The concurrent remove case actually demonstrates why fd_alloc
937 : ... / fd_wksp / fd_shmem / etc exist in the first place. Beyond
938 : ... being faster, simpler, more concurrent and more reliable
939 : ... (especially in cases like this), they are more flexible (e.g.
940 : ... sharing and persisting the data structure asynchronously
941 : ... across multiple processes in different address spaces) and
942 : ... more secure (e.g. can easily bounds check memory accesses
943 : ... and then use the memory subsystem to sandbox different
944 : ... components from touching memory they shouldn't, actually
945 : ... using a modern virtual memory subsystem for something useful
946 : ... for a change instead of bending over backwards to provide
947 : ... exactly the wrong abstraction of the real world). Common
948 : ... hardware and software practices have turned computers into an
949 : ... unreliable and insecure Tower of Babel. Had virtual memory
950 : ... been better abstracted and better implemented all levels of
951 : ... the stack, life would be much easier (and things like fast
952 : ... persistent memories might have seen a lot more commercial
953 : ... success). In the meantime, dispelling the magical thinking
954 : ... encouraged by the conventional abstractions, the key lessons
955 : ... are:
956 : ...
957 : ... * Friends don't let friends malloc.
958 : ... * Lockfree is not a synonym for garbage collection.
959 : ... * Real world computers aren't infinite tape Turing machines.
960 : ... * Real world memory doesn't magically disappear.
961 :
962 : ... At this point, we are done with speculative processing (or we
963 : ... don't want to do any more speculative processing if the try
964 : ... has already failed).
965 :
966 : err = mymap_query_test( query );
967 : if( FD_UNLIKELY( err ) ) {
968 :
969 : ... At this point, err will be FD_MAP_ERR_AGAIN and a
970 : ... potentially conflicting operation in the try was detected
971 : ... by the test.
972 :
973 : ... Application specific handling here (e.g. try again after a
974 : ... random backoff or doing other non-conflicting work).
975 :
976 : } else {
977 :
978 : ... At this point, the results of the speculation thus far can
979 : ... be trusted and can be considered to have been computed at
980 : ... some point in time between try and test.
981 :
982 : }
983 : }
984 :
985 : To better understand the txn API:
986 :
987 : ... allocate a txn
988 :
989 : ulong align = mymap_txn_align();
990 : ulong footprint = mymap_txn_footprint( key_max );
991 : void * ltxn = ... allocate align/footprint local scratch memory
992 : mymap_txn_t * txn = mymap_txn_init( ltxn, join, key_max );
993 :
994 : ... add at most key_max keys to the transaction as locked
995 :
996 : for( ... all keys involved in the transaction ... ) mymap_txn_add( txn, key, 1 ); // guaranteed to succeed for this example
997 :
998 : ... try to do the transaction
999 :
1000 : int err = mymap_txn_try( txn, FD_MAP_FLAG_BLOCKING );
1001 :
1002 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
1003 :
1004 : ... At this point, err is FD_MAP_ERR_AGAIN and there was a
1005 : ... potentially conflicting operation in progress during the try.
1006 : ... We can try again later (e.g. after a random backoff or doing
1007 : ... other non-conflicting work). We are no longer in a try but
1008 : ... we could reuse the txn as-is to retry.
1009 :
1010 : } else {
1011 :
1012 : ... At this point, it is safe to try the transaction.
1013 :
1014 : ... Do the transaction here. Since all keys are locked in this
1015 : ... example, we don't need to worry about any changing behind our
1016 : ... back (i.e. the try is guaranteed to succeed).
1017 :
1018 : ... Like modify, if we wants to rollback the transaction at this
1019 : ... point, we should save the state of all locked keys involved
1020 : ... to local temporaries before we do the transaction and then
1021 : ... restore the state here.
1022 :
1023 : ... Finish the try (guaranteed to succeed for this example)
1024 :
1025 : mymap_txn_test( txn );
1026 :
1027 : ... At this point, we are no longer in a txn try but the txn is
1028 : ... valid such that we could reuse the txn as-is for another
1029 : ... transaction involving the same keys.
1030 :
1031 : mymap_txn_fini( txn );
1032 :
1033 : ... At this point, txn is no longer valid and we have ownership of
1034 : ... the ltxn memory region
1035 :
1036 : ... free ltxn
1037 :
1038 : }
1039 :
1040 : To better understand the iter API:
1041 :
1042 : ... basic mymap element snapshot (i.e. iterate over all elements in
1043 : ... the mymap at a globally consistent point in time while
1044 : ... minimizing contention with other concurrent operations)
1045 :
1046 : ulong lock_cnt = mymap_chain_cnt( join );
1047 :
1048 : ulong * lock_seq = ... allocate lock_cnt ulong scratch ...
1049 :
1050 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_seq[ lock_idx ] = lock_idx;
1051 :
1052 : mymap_iter_lock( join, lock_seq, lock_cnt, FD_MAP_FLAG_BLOCKING );
1053 :
1054 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
1055 : ulong chain_idx = lock_seq[ lock_idx ]; // process chains in the order they were locked
1056 :
1057 : for( mymap_iter_t iter = mymap_iter( join, chain_idx ); !mymap_iter_done( iter ); iter = mymap_iter_next( iter ) ) {
1058 : myele_t const * ele = mymap_iter_ele_const( iter );
1059 :
1060 : ... append ele to snapshot here (ele will be appended in
1061 : ... no particular order for this example). Note that, as
1062 : ... the caller has a lock on the chain that manages ele,
1063 : ... the caller is free to modify the fields of ele it
1064 : ... manages.
1065 :
1066 : }
1067 :
1068 : mymap_iter_unlock( join, lock_seq + lock_idx, 1UL ); // unlock incrementally
1069 : }
1070 :
1071 : ... free lock_seq here
1072 : */
1073 :
1074 : #include "fd_map.h"
1075 :
1076 : /* FIXME: consider adding a parallel verify that operates on a
1077 : locked/idle subset of the chains. */
1078 :
1079 : /* MAP_NAME gives the API prefix to use for map */
1080 :
1081 : #ifndef MAP_NAME
1082 : #error "Define MAP_NAME"
1083 : #endif
1084 :
1085 : /* MAP_ELE_T is the map element type */
1086 :
1087 : #ifndef MAP_ELE_T
1088 : #error "Define MAP_ELE_T"
1089 : #endif
1090 :
1091 : /* MAP_KEY_T is the map key type */
1092 :
1093 : #ifndef MAP_KEY_T
1094 : #define MAP_KEY_T ulong
1095 : #endif
1096 :
1097 : /* MAP_KEY is the MAP_ELE_T key field */
1098 :
1099 : #ifndef MAP_KEY
1100 : #define MAP_KEY key
1101 : #endif
1102 :
1103 : /* MAP_IDX_T is the map next index type. Should be a primitive unsigned
1104 : integer type large enough to represent the largest capacity element
1105 : store of interest. (E.g. if ushort, the maximum element store
1106 : capacity compatible with the map will be 65535 elements.) */
1107 :
1108 : #ifndef MAP_IDX_T
1109 138944 : #define MAP_IDX_T ulong
1110 : #endif
1111 :
1112 : /* MAP_NEXT is the MAP_ELE_T next field */
1113 :
1114 : #ifndef MAP_NEXT
1115 : #define MAP_NEXT next
1116 : #endif
1117 :
1118 : /* MAP_KEY_EQ returns 0/1 if *k0 is the same/different as *k1 */
1119 :
1120 : #ifndef MAP_KEY_EQ
1121 0 : #define MAP_KEY_EQ(k0,k1) ((*(k0))==(*(k1)))
1122 : #endif
1123 :
1124 : /* MAP_KEY_HASH returns a random mapping of *key into ulong. The
1125 : mapping is parameterized by the 64-bit ulong seed. */
1126 :
1127 : #ifndef MAP_KEY_HASH
1128 : #define MAP_KEY_HASH(key,seed) fd_ulong_hash( (*(key)) ^ (seed) )
1129 : #endif
1130 :
1131 : /* If MAP_MEMOIZE is defined to non-zero, elements have a field that
1132 : can be used while in the map to hold the MAP_KEY_HASH for an
1133 : element's key. This is useful for accelerating user code that might
1134 : need a hash and various map operations. */
1135 :
1136 : #ifndef MAP_MEMOIZE
1137 : #define MAP_MEMOIZE 0
1138 : #endif
1139 :
1140 : /* If MAP_MEMOIZE is non-zero, MAP_MEMO is the memo element field.
1141 : Should be a ulong. Like MAP_KEY and MAP_NEXT, when an element is in
1142 : the map, this value is managed by the map and will contain the
1143 : MAP_KEY_HASH of the element's key and the map's seed. */
1144 :
1145 : #ifndef MAP_MEMO
1146 : #define MAP_MEMO memo
1147 : #endif
1148 :
1149 : /* If MAP_MEMOIZE is defined to non-zero, a non-zero MAP_KEY_EQ_IS_SLOW
1150 : indicates the MAP_MEMO field should be used to accelerate MAP_KEY_EQ
1151 : operations. This is useful when MAP_KEY_EQ is non-trivial (e.g.
1152 : variable length string compare, large buffer compares, etc). */
1153 :
1154 : #ifndef MAP_KEY_EQ_IS_SLOW
1155 : #define MAP_KEY_EQ_IS_SLOW 0
1156 : #endif
1157 :
1158 : /* MAP_CNT_WIDTH gives the number of bits in a ulong to reserve for
1159 : encoding the count in a versioned count. Element store capacity
1160 : should be representable in this width. Default is 43 bits (e.g.
1161 : enough to support a ~1 PiB element store of 128 byte elements). The
1162 : versioning width will be 64-MAP_CNT_WIDTH. Since the least
1163 : significant bit of the version is used to indicate locked, versioning
1164 : width should be at least 2 and ideally as large as possible. With
1165 : the 43 default, a chain's version number will not be reused until
1166 : 2^20 individual operations on a chain have been done. Version
1167 : numbers only impact speculative operations. If not using speculative
1168 : operations, version width can be reduced to the minimum. */
1169 :
1170 : #ifndef MAP_CNT_WIDTH
1171 1158877 : #define MAP_CNT_WIDTH (43)
1172 : #endif
1173 :
1174 : /* If MAP_CUSTOM_CHAIN is defined to non-zero, does not generate the
1175 : chain header struct definition. */
1176 :
1177 : #ifndef MAP_CUSTOM_CHAIN
1178 : #define MAP_CUSTOM_CHAIN 0
1179 : #endif
1180 :
1181 : /* MAP_ALIGN gives the alignment required for the map shared memory.
1182 : Default is 128 for double cache line alignment. Should be at least
1183 : ulong alignment. */
1184 :
1185 : #ifndef MAP_ALIGN
1186 : #define MAP_ALIGN (128UL)
1187 : #endif
1188 :
1189 : /* MAP_MAGIC is the shared memory magic number to aid in persistent
1190 : and/or interprocess usage. */
1191 :
1192 : #ifndef MAP_MAGIC
1193 0 : #define MAP_MAGIC (0xf17eda2c37a7a900UL) /* firedancer map_para version 0 */
1194 : #endif
1195 :
1196 : /* MAP_IMPL_STYLE controls what to generate:
1197 : 0 - header only library
1198 : 1 - library header declaration
1199 : 2 - library implementation */
1200 :
1201 : #ifndef MAP_IMPL_STYLE
1202 : #define MAP_IMPL_STYLE 0
1203 : #endif
1204 :
1205 : /* If MAP_PEDANTIC is defined to non-zero, aborts with FD_LOG_CRIT
1206 : instead of gracefully returning if corruption is found. */
1207 :
1208 : #ifndef MAP_PEDANTIC
1209 : #define MAP_PEDANTIC 0
1210 : #endif
1211 :
1212 : /* Implementation *****************************************************/
1213 :
1214 1749602 : #define MAP_VER_WIDTH (64-MAP_CNT_WIDTH)
1215 :
1216 : #if MAP_IMPL_STYLE==0 /* local use only */
1217 : #define MAP_STATIC FD_FN_UNUSED static
1218 : #else /* library header and/or implementation */
1219 : #define MAP_STATIC
1220 : #endif
1221 :
1222 157572670 : #define MAP_(n) FD_EXPAND_THEN_CONCAT3(MAP_NAME,_,n)
1223 :
1224 : #if MAP_IMPL_STYLE!=2 /* need header */
1225 :
1226 : #include "../bits/fd_bits.h"
1227 :
1228 : /* Note: we don't overalign chain metadata to reduce on map metadata
1229 : footprint requirements. Though this can cause cache false sharing
1230 : for concurrent operations on different keys that are managed
1231 : different chains that share a cache line, this risk can be controlled
1232 : by overprovisioning chain_cnt. That is, for a fixed map metadata
1233 : footprint, this false sharing seems preferable to using fewer chains
1234 : as that would lead to an equivalent increase in the amount of locking
1235 : necessary to avoid potential conflicts for keys managed by the same
1236 : chain (i.e. the former makes good use of the padding that would be
1237 : otherwise wasted if overaligning this). */
1238 :
1239 : #if !MAP_CUSTOM_CHAIN
1240 : struct MAP_(shmem_private_chain) {
1241 : ulong ver_cnt; /* versioned count, cnt is in [0,ele_max] in lsb, ver in msb, odd: chain locked, even: chain unlocked */
1242 : MAP_IDX_T head_cidx; /* compressed index of the first element on the chain */
1243 : };
1244 : #endif /* MAP_CUSTOM_CHAIN */
1245 :
1246 : typedef struct MAP_(shmem_private_chain) MAP_(shmem_private_chain_t);
1247 :
1248 : struct __attribute__((aligned(MAP_ALIGN))) MAP_(shmem_private) {
1249 :
1250 : /* FIXME: consider having a memo of the chain in which an element is
1251 : stored and/or using doubly linked list chains (maybe with the xor
1252 : trick)? We could do faster variants of remove and maybe amortize
1253 : some hash calcs. */
1254 :
1255 : ulong magic; /* == MAP_MAGIC */
1256 : ulong seed; /* Hash seed, arbitrary */
1257 : ulong chain_cnt; /* Number of chains, positive integer power-of-two */
1258 :
1259 : /* Padding to MAP_ALIGN alignment here */
1260 :
1261 : /* MAP_(shmem_private_chain_t) chain[ chain_cnt ] here */
1262 : };
1263 :
1264 : typedef struct MAP_(shmem_private) MAP_(shmem_t);
1265 :
1266 : struct MAP_(private) {
1267 : MAP_(shmem_t) * map; /* Location of the map in the local address space */
1268 : MAP_ELE_T * ele; /* Location of the element store in the local address space */
1269 : ulong ele_max; /* Capacity of the element store, in [0,ele_max_max] */
1270 : };
1271 :
1272 : typedef struct MAP_(private) MAP_(t);
1273 :
1274 : struct MAP_(query_private) {
1275 : ulong memo; /* Query key memo */
1276 : MAP_ELE_T * ele; /* Points to the operation element in the local address space (or a sentinel) */
1277 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages element in the local address space */
1278 : ulong ver_cnt; /* Versioned count of the chain at operation try */
1279 : };
1280 :
1281 : typedef struct MAP_(query_private) MAP_(query_t);
1282 :
1283 : struct MAP_(txn_private_info) {
1284 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages one or more txn keys (set by txn_add) */
1285 : ulong ver_cnt; /* Versioned count of the chain at the transaction start (set by txn_try) */
1286 : };
1287 :
1288 : typedef struct MAP_(txn_private_info) MAP_(txn_private_info_t);
1289 :
1290 : struct MAP_(txn_private) {
1291 : MAP_(shmem_t) * map; /* Map used by this transaction */
1292 : ulong info_max; /* Number of chains possible for this transaction */
1293 : ulong lock_cnt; /* Number of chains in the locked set, in [0,info_max] */
1294 : ulong spec_cnt; /* Number of chains in the speculative set, in [0,info_max], lock_cnt + spec_cnt <= info_max */
1295 :
1296 : /* MAP_(txn_private_info_t) info[ info_max ] here (obligatory sigh
1297 : about lagging C++ support for 0 sized structure array footers).
1298 :
1299 : The locked set is at indices [0,lock_cnt), lock_cnt infos.
1300 : The free set is at indices [lock_cnt,info_max-spec_cnt), free_cnt = info_max-spec_cnt-lock_cnt infos.
1301 : The speculative set is at indices [info_max-spec_cnt,info_max), spec_cnt infos.
1302 :
1303 : A chain will appear at most once in a set. A chain will not appear
1304 : in both sets.
1305 :
1306 : Note that it would be trivial to make this shared memory persistent
1307 : though not obvious if that would be useful. (A precomputed
1308 : template for a common transaction done by multiple threads is a
1309 : possibility but the versions would still need to be local.) */
1310 :
1311 : };
1312 :
1313 : typedef struct MAP_(txn_private) MAP_(txn_t);
1314 :
1315 : struct MAP_(iter_private) {
1316 : MAP_ELE_T const * ele; /* Pointer to the element store in the caller's address space */
1317 : ulong ele_idx; /* Current iteration element store index (or the null index) */
1318 : };
1319 :
1320 : typedef struct MAP_(iter_private) MAP_(iter_t);
1321 :
1322 : FD_PROTOTYPES_BEGIN
1323 :
1324 : /* map_private_vcnt pack ver and cnt into a versioned cnt. ver is
1325 : masked to fit into MAP_VER_WIDTH bits. cnt is assumed in
1326 : [0,ele_max_max].
1327 :
1328 : map_private_vcnt_{ver,cnt} extract the {version,index} from a
1329 : versioned index. Return will fit into {MAP_VER_WIDTH,MAP_CNT_WIDTH}
1330 : bits. */
1331 :
1332 442657 : FD_FN_CONST static inline ulong MAP_(private_vcnt)( ulong ver, ulong cnt ) { return (ver<<MAP_CNT_WIDTH) | cnt; }
1333 :
1334 789638 : FD_FN_CONST static inline ulong MAP_(private_vcnt_ver)( ulong ver_cnt ) { return ver_cnt >> MAP_CNT_WIDTH; }
1335 874705 : FD_FN_CONST static inline ulong MAP_(private_vcnt_cnt)( ulong ver_cnt ) { return (ver_cnt << MAP_VER_WIDTH) >> MAP_VER_WIDTH; }
1336 :
1337 : /* map_shmem_private_chain returns the location in the caller's address
1338 : space of the map chain metadata associated with hash. The chain
1339 : associated with hash 0 is the first chain. Assumes map is valid.
1340 : map_shmem_private_chain_const is a const correct version. */
1341 :
1342 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) *
1343 : MAP_(shmem_private_chain)( MAP_(shmem_t) * map,
1344 432536 : ulong hash ) {
1345 432536 : return (MAP_(shmem_private_chain_t) *)(map+1) + (hash & (map->chain_cnt-1UL));
1346 432536 : }
1347 :
1348 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) const *
1349 : MAP_(shmem_private_chain_const)( MAP_(shmem_t) const * map,
1350 31040911 : ulong hash ) {
1351 31040911 : return (MAP_(shmem_private_chain_t) const *)(map+1) + (hash & (map->chain_cnt-1UL));
1352 31040911 : }
1353 :
1354 : /* map_txn_private_info returns the location in the caller's address
1355 : space of the txn info. Assumes txn is valid. */
1356 :
1357 : FD_FN_CONST static inline MAP_(txn_private_info_t) *
1358 4212 : MAP_(txn_private_info)( MAP_(txn_t) * txn ) {
1359 4212 : return (MAP_(txn_private_info_t) *)(txn+1);
1360 4212 : }
1361 :
1362 : /* map_private_{cidx,idx} compress / decompress 64-bit in-register
1363 : indices to/from their in-memory representations. */
1364 :
1365 227410 : FD_FN_CONST static inline MAP_IDX_T MAP_(private_cidx)( ulong idx ) { return (MAP_IDX_T)idx; }
1366 31060966 : FD_FN_CONST static inline ulong MAP_(private_idx) ( MAP_IDX_T cidx ) { return (ulong) cidx; }
1367 :
1368 : /* map_private_idx_null returns the element storage index that
1369 : represents NULL. */
1370 :
1371 12384 : FD_FN_CONST static inline ulong MAP_(private_idx_null)( void ) { return (ulong)(MAP_IDX_T)(~0UL); }
1372 :
1373 : /* map_private_idx_is_null returns 1 if idx is the NULL map index and 0
1374 : otherwise. */
1375 :
1376 30770071 : FD_FN_CONST static inline int MAP_(private_idx_is_null)( ulong idx ) { return idx==(ulong)(MAP_IDX_T)(~0UL); }
1377 :
1378 : /* map_private_fetch_and_or does a ulong FD_ATOMIC_FETCH_AND_OR when the
1379 : target has FD_HAS_ATOMIC and emulates it when not. When emulated,
1380 : the map will not be safe to use concurrently but will still work. */
1381 :
1382 : static inline ulong
1383 : MAP_(private_fetch_and_or)( ulong volatile * p,
1384 399880 : ulong b ) {
1385 399880 : ulong x;
1386 399880 : FD_COMPILER_MFENCE();
1387 399880 : # if FD_HAS_ATOMIC
1388 399880 : x = FD_ATOMIC_FETCH_AND_OR( p, b );
1389 : # else
1390 : x = *p;
1391 : *p = x | b;
1392 : # endif
1393 399880 : FD_COMPILER_MFENCE();
1394 399880 : return x;
1395 399880 : }
1396 :
1397 192 : FD_FN_CONST static inline ulong MAP_(ele_max_max)( void ) { return (ulong)(MAP_IDX_T)(ULONG_MAX >> MAP_VER_WIDTH); }
1398 :
1399 : FD_FN_CONST static inline ulong
1400 252 : MAP_(chain_max)( void ) {
1401 252 : return fd_ulong_pow2_dn( (ULONG_MAX - sizeof(MAP_(shmem_t)) - alignof(MAP_(shmem_t)) + 1UL) /
1402 252 : sizeof(MAP_(shmem_private_chain_t)) );
1403 252 : }
1404 :
1405 : FD_FN_CONST static inline ulong
1406 108 : MAP_(chain_cnt_est)( ulong ele_max_est ) {
1407 :
1408 : /* Clamp to be in [1,ele_max_max] (as ele_max_est 0 is degenerate and
1409 : as the map is guaranteed to hold at most ele_max_max keys). */
1410 :
1411 108 : ele_max_est = fd_ulong_min( fd_ulong_max( ele_max_est, 1UL ), MAP_(ele_max_max)() );
1412 :
1413 : /* Compute the number of chains as the power of 2 that makes the
1414 : average chain length between ~1 and ~2 when ele_max_est are stored
1415 : in the map and then clamp to the chain max. */
1416 :
1417 108 : ulong chain_min = (ele_max_est>>1) + (ele_max_est&1UL); /* chain_min = ceil(ele_max_est/2), in [1,2^63], computed w/o overflow */
1418 108 : ulong chain_cnt = fd_ulong_pow2_up( chain_min ); /* Power of 2 in [1,2^63] */
1419 :
1420 108 : return fd_ulong_min( chain_cnt, MAP_(chain_max)() );
1421 108 : }
1422 :
1423 396 : FD_FN_CONST static inline ulong MAP_(align)( void ) { return alignof(MAP_(shmem_t)); }
1424 :
1425 : FD_FN_CONST static inline ulong
1426 144 : MAP_(footprint)( ulong chain_cnt ) {
1427 144 : if( !(fd_ulong_is_pow2( chain_cnt ) & (chain_cnt<=MAP_(chain_max)())) ) return 0UL;
1428 : /* Note: assumes shmem_t and shmem_private_chain_t have compatible alignments */
1429 144 : return fd_ulong_align_up( sizeof(MAP_(shmem_t)) + chain_cnt*sizeof(MAP_(shmem_private_chain_t)),
1430 144 : alignof(MAP_(shmem_t)) ); /* no overflow */
1431 144 : }
1432 :
1433 0 : FD_FN_PURE static inline ulong MAP_(seed) ( MAP_(t) const * join ) { return join->map->seed; }
1434 60278 : FD_FN_PURE static inline ulong MAP_(chain_cnt)( MAP_(t) const * join ) { return join->map->chain_cnt; }
1435 :
1436 0 : FD_FN_PURE static inline void const * MAP_(shmap_const)( MAP_(t) const * join ) { return join->map; }
1437 0 : FD_FN_PURE static inline void const * MAP_(shele_const)( MAP_(t) const * join ) { return join->ele; }
1438 0 : FD_FN_PURE static inline ulong MAP_(ele_max) ( MAP_(t) const * join ) { return join->ele_max; }
1439 :
1440 0 : FD_FN_PURE static inline void * MAP_(shmap)( MAP_(t) * join ) { return join->map; }
1441 0 : FD_FN_PURE static inline void * MAP_(shele)( MAP_(t) * join ) { return join->ele; }
1442 :
1443 : FD_FN_PURE static inline int
1444 : MAP_(key_eq)( MAP_KEY_T const * k0,
1445 244508 : MAP_KEY_T const * k1 ) {
1446 244508 : return !!(MAP_KEY_EQ( (k0), (k1) ));
1447 244508 : }
1448 :
1449 : FD_FN_PURE static inline ulong
1450 : MAP_(key_hash)( MAP_KEY_T const * key,
1451 873411 : ulong seed ) {
1452 873411 : return (MAP_KEY_HASH( (key), (seed) ));
1453 873411 : }
1454 :
1455 : static inline void
1456 : MAP_(backoff)( ulong scale,
1457 0 : ulong seed ) {
1458 0 : ulong r = (ulong)(uint)fd_ulong_hash( seed ^ (((ulong)fd_tickcount())<<32) );
1459 0 : for( ulong rem=(scale*r)>>48; rem; rem-- ) FD_SPIN_PAUSE();
1460 0 : }
1461 :
1462 0 : FD_FN_PURE static inline ulong MAP_(query_memo )( MAP_(query_t) const * query ) { return query->memo; }
1463 0 : FD_FN_PURE static inline MAP_ELE_T const * MAP_(query_ele_const)( MAP_(query_t) const * query ) { return query->ele; }
1464 28759 : FD_FN_PURE static inline MAP_ELE_T * MAP_(query_ele )( MAP_(query_t) * query ) { return query->ele; }
1465 :
1466 : static inline int
1467 0 : MAP_(modify_test)( MAP_(query_t) * query ) {
1468 0 : MAP_(shmem_private_chain_t) * chain = query->chain;
1469 0 : ulong ver_cnt = query->ver_cnt;
1470 0 : FD_COMPILER_MFENCE();
1471 0 : chain->ver_cnt = ver_cnt + (2UL<<MAP_CNT_WIDTH);
1472 0 : FD_COMPILER_MFENCE();
1473 0 : return FD_MAP_SUCCESS;
1474 0 : }
1475 :
1476 : static inline int
1477 58932 : MAP_(query_test)( MAP_(query_t) const * query ) {
1478 58932 : MAP_(shmem_private_chain_t) const * chain = query->chain;
1479 58932 : ulong ver_cnt = query->ver_cnt;
1480 58932 : FD_COMPILER_MFENCE();
1481 58932 : ulong _ver_cnt = chain->ver_cnt;
1482 58932 : FD_COMPILER_MFENCE();
1483 58932 : return fd_int_if( ver_cnt==_ver_cnt, FD_MAP_SUCCESS, FD_MAP_ERR_AGAIN );
1484 58932 : }
1485 :
1486 : FD_FN_CONST static inline ulong
1487 1404 : MAP_(txn_key_max_max)( void ) {
1488 1404 : return (ULONG_MAX - sizeof(MAP_(txn_t)) - alignof(MAP_(txn_t)) + 1UL) / sizeof( MAP_(txn_private_info_t) );
1489 1404 : }
1490 :
1491 1404 : FD_FN_CONST static inline ulong MAP_(txn_align)( void ) { return alignof(MAP_(txn_t)); }
1492 :
1493 : FD_FN_CONST static inline ulong
1494 0 : MAP_(txn_footprint)( ulong key_max ) {
1495 0 : if( key_max > MAP_(txn_key_max_max)() ) return 0UL;
1496 0 : return sizeof(MAP_(txn_t)) + key_max*sizeof(MAP_(txn_private_info_t)); /* no overflow */
1497 0 : }
1498 :
1499 : static inline MAP_(txn_t) *
1500 : MAP_(txn_init)( void * mem,
1501 : MAP_(t) * join,
1502 1404 : ulong key_max ) {
1503 1404 : MAP_(txn_t) * txn = (MAP_(txn_t) *)mem;
1504 1404 : if( FD_UNLIKELY( (!mem ) |
1505 1404 : (!fd_ulong_is_aligned( (ulong)mem, MAP_(txn_align)() )) |
1506 1404 : (!join ) |
1507 1404 : (key_max > MAP_(txn_key_max_max)() ) ) ) return NULL;
1508 1404 : txn->map = join->map;
1509 1404 : txn->info_max = key_max; /* Worst case number of chains impacted by this transaction */
1510 1404 : txn->lock_cnt = 0UL;
1511 1404 : txn->spec_cnt = 0UL;
1512 1404 : return txn;
1513 1404 : }
1514 :
1515 1404 : FD_FN_CONST static inline void * MAP_(txn_fini)( MAP_(txn_t) * txn ) { return (void *)txn; }
1516 :
1517 : FD_FN_PURE static inline ulong
1518 : MAP_(iter_chain_idx)( MAP_(t) const * join,
1519 0 : MAP_KEY_T const * key ) {
1520 0 : MAP_(shmem_t) const * map = join->map;
1521 0 : return MAP_(key_hash)( key, map->seed ) & (map->chain_cnt-1UL);
1522 0 : }
1523 :
1524 : FD_FN_PURE static inline MAP_(iter_t)
1525 : MAP_(iter)( MAP_(t) const * join,
1526 30597028 : ulong chain_idx ) {
1527 : /* FIXME: consider iter = {NULL,NULL} if chain_idx >= join->map->chain_cnt? */
1528 30597028 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( join->map, 0UL ) + chain_idx;
1529 30597028 : MAP_(iter_t) iter;
1530 30597028 : iter.ele = join->ele;
1531 30597028 : iter.ele_idx = MAP_(private_idx)( chain->head_cidx );
1532 30597028 : return iter;
1533 30597028 : }
1534 :
1535 30707621 : FD_FN_CONST static inline int MAP_(iter_done)( MAP_(iter_t) iter ) { return MAP_(private_idx_is_null)( iter.ele_idx ); }
1536 :
1537 : __attribute__((warn_unused_result))
1538 : FD_FN_PURE static inline MAP_(iter_t)
1539 0 : MAP_(iter_next)( MAP_(iter_t) iter ) {
1540 0 : MAP_ELE_T const * ele = iter.ele + iter.ele_idx;
1541 0 : iter.ele_idx = MAP_(private_idx)( ele->MAP_NEXT );
1542 0 : return iter;
1543 0 : }
1544 :
1545 : FD_FN_CONST static inline MAP_ELE_T *
1546 160234 : MAP_(iter_ele)( MAP_(iter_t) iter ) {
1547 160234 : return (MAP_ELE_T *)(iter.ele + iter.ele_idx);
1548 160234 : }
1549 :
1550 : FD_FN_CONST static inline MAP_ELE_T const *
1551 0 : MAP_(iter_ele_const)( MAP_(iter_t) iter ) {
1552 0 : return iter.ele + iter.ele_idx;
1553 0 : }
1554 :
1555 : MAP_STATIC void * MAP_(new) ( void * shmem, ulong chain_cnt, ulong seed );
1556 : MAP_STATIC MAP_(t) * MAP_(join) ( void * ljoin, void * shmap, void * shele, ulong ele_max );
1557 : MAP_STATIC void * MAP_(leave) ( MAP_(t) * join );
1558 : MAP_STATIC void * MAP_(delete)( void * shmap );
1559 :
1560 : MAP_STATIC void
1561 : MAP_(hint)( MAP_(t) const * join,
1562 : MAP_KEY_T const * key,
1563 : MAP_(query_t) * query,
1564 : int flags );
1565 :
1566 : MAP_STATIC int MAP_(insert)( MAP_(t) * join, MAP_ELE_T * ele, int flags );
1567 :
1568 : MAP_STATIC int
1569 : MAP_(remove)( MAP_(t) * join,
1570 : MAP_KEY_T const * key,
1571 : MAP_ELE_T const * sentinel,
1572 : MAP_(query_t) * query,
1573 : int flags );
1574 :
1575 : MAP_STATIC int
1576 : MAP_(modify_try)( MAP_(t) * join,
1577 : MAP_KEY_T const * key,
1578 : MAP_ELE_T * sentinel,
1579 : MAP_(query_t) * query,
1580 : int flags );
1581 :
1582 : MAP_STATIC int
1583 : MAP_(query_try)( MAP_(t) const * join,
1584 : MAP_KEY_T const * key,
1585 : MAP_ELE_T const * sentinel,
1586 : MAP_(query_t) * query,
1587 : int flags );
1588 :
1589 : MAP_STATIC int MAP_(txn_add)( MAP_(txn_t) * txn, MAP_KEY_T const * key, int lock );
1590 :
1591 : MAP_STATIC int MAP_(txn_try)( MAP_(txn_t) * txn, int flags );
1592 :
1593 : static inline void
1594 : MAP_(txn_hint)( MAP_(t) const * join,
1595 : MAP_KEY_T const * key,
1596 : MAP_(query_t) * query,
1597 0 : int flags ) {
1598 0 : MAP_(hint)( join, key, query, flags );
1599 0 : }
1600 :
1601 : MAP_STATIC int
1602 : MAP_(txn_insert)( MAP_(t) * join,
1603 : MAP_ELE_T * ele );
1604 :
1605 : MAP_STATIC int
1606 : MAP_(txn_remove)( MAP_(t) * join,
1607 : MAP_KEY_T const * key,
1608 : MAP_ELE_T const * sentinel,
1609 : MAP_(query_t) * query,
1610 : int flags );
1611 :
1612 : MAP_STATIC int
1613 : MAP_(txn_modify)( MAP_(t) * join,
1614 : MAP_KEY_T const * key,
1615 : MAP_ELE_T * sentinel,
1616 : MAP_(query_t) * query,
1617 : int flags );
1618 :
1619 : static inline int
1620 : MAP_(txn_query)( MAP_(t) const * join,
1621 : MAP_KEY_T const * key,
1622 : MAP_ELE_T const * sentinel,
1623 : MAP_(query_t) * query,
1624 1404 : int flags ) {
1625 1404 : return MAP_(txn_modify)( (MAP_(t) *)join, key, (MAP_ELE_T *)sentinel, query, flags & (~FD_MAP_FLAG_ADAPTIVE) );
1626 1404 : }
1627 :
1628 : MAP_STATIC int MAP_(txn_test)( MAP_(txn_t) * txn );
1629 :
1630 : MAP_STATIC int
1631 : MAP_(iter_lock)( MAP_(t) * join,
1632 : ulong * lock_seq,
1633 : ulong lock_cnt,
1634 : int flags );
1635 :
1636 : MAP_STATIC void
1637 : MAP_(iter_unlock)( MAP_(t) * join,
1638 : ulong const * lock_seq,
1639 : ulong lock_cnt );
1640 :
1641 : MAP_STATIC void MAP_(reset)( MAP_(t) * join );
1642 :
1643 : MAP_STATIC int MAP_(verify)( MAP_(t) const * join );
1644 :
1645 : MAP_STATIC FD_FN_CONST char const * MAP_(strerror)( int err );
1646 :
1647 : FD_PROTOTYPES_END
1648 :
1649 : #endif
1650 :
1651 : #if MAP_IMPL_STYLE!=1 /* need implementations (assumes header already included) */
1652 :
1653 : #include "../log/fd_log.h" /* Used by constructors and verify (FIXME: Consider making a compile time option) */
1654 :
1655 : /* MAP_CRIT_{BEGIN,BLOCKED,END} handle virtually all atomic boilerplate
1656 : for operations that require modifying a map chain's structure or
1657 : elements managed by that chain. Usage:
1658 :
1659 : MAP_CRIT( chain, blocking ) {
1660 :
1661 : ... At this point, we have a lock on the chain and the "ulong"
1662 : ... ver_cnt contains the chain's versioned count just before we
1663 : ... took the lock. The "int" retain_lock is zero.
1664 : ...
1665 : ... Do locked operations on the map chain here
1666 : ...
1667 : ... On exiting this block, if retain_lock is non-zero, we resume
1668 : ... execution immediately after MAP_CRIT_END. This is used for
1669 : ... "try" style operations where a "test" operation is done to
1670 : ... unlock the chain after the caller does their try/test work.
1671 : ... Otherwise, we will update the version number, unlock the
1672 : ... chain and then resume execution after MAP_CRIT_END.
1673 : ...
1674 : ... Because compiler memory fences are done just before entering
1675 : ... and after exiting this block, there is typically no need to
1676 : ... use any atomics / volatile / fencing here. That is, we can
1677 : ... just write "normal" code on platforms where writes to memory
1678 : ... become visible to other threads in the order in which they
1679 : ... were issued in the machine code (e.g. x86) as the version
1680 : ... update and unlock writes are after the changes done here
1681 : ... and others will not proceed until they see the new version
1682 : ... and unlock. YMMV for non-x86 platforms (probably need
1683 : ... additional hardware store fences in these macros).
1684 : ...
1685 : ... It is safe to use "break" and/or "continue" within this
1686 : ... block. The overall MAP_CRIT will exit with the appropriate
1687 : ... compiler fencing, version update and unlocking and then
1688 : ... execution will resume immediately after MAP_CRIT_END.
1689 : ...
1690 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1691 : ...
1692 : ... IMPORTANT SAFETY TIP! OPERATIONS THAT CHANGE THE CHAIN
1693 : ... ELEMENT COUNT SHOULD UPDATE VER_CNT's COUNT WHILE HOLDING
1694 : ... THE VERSION CONSTANT.
1695 :
1696 : } MAP_CRIT_BLOCKED {
1697 :
1698 : ... At this point, somebody else had a lock on the chain when we
1699 : ... tried to take the lock.
1700 : ...
1701 : ... Handle blocked here.
1702 : ...
1703 : ... On exiting this block, if blocking was zero in MAP_CRIT, we
1704 : ... will resume execution immediately after MAP_CRIT_END. If
1705 : ... blocking was non-zero, we will resume execution immediately
1706 : ... before MAP_CRIT (e.g. we will retry again after a short spin
1707 : ... pause). Similar considerations to the above for compiler
1708 : ... memory fences, "break" and "continue". As we do not have the
1709 : ... lock here, retain_lock is neither relevant nor available.
1710 : ...
1711 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1712 :
1713 : } MAP_CRIT_END; */
1714 :
1715 400808 : #define MAP_CRIT(c,b) do { \
1716 400808 : ulong volatile * _vc = (ulong volatile *)&(c)->ver_cnt; \
1717 400808 : int _b = (b); \
1718 400808 : int retain_lock = 0; \
1719 400808 : for(;;) { \
1720 400631 : ulong ver_cnt = *_vc; \
1721 400631 : /* use a test-and-test-and-set style to reduce atomic contention */ \
1722 400701 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1723 400701 : ver_cnt = MAP_(private_fetch_and_or)( _vc, 1UL<<MAP_CNT_WIDTH ); \
1724 404292 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1725 404292 : FD_COMPILER_MFENCE(); \
1726 404292 : do
1727 :
1728 : #define MAP_CRIT_BLOCKED \
1729 404292 : while(0); \
1730 404292 : FD_COMPILER_MFENCE(); \
1731 403637 : if( !retain_lock ) *_vc = ver_cnt+(2UL<<MAP_CNT_WIDTH); /* likely compile time */ \
1732 403637 : FD_COMPILER_MFENCE(); \
1733 215798 : break; \
1734 404292 : } \
1735 400701 : } \
1736 >1844*10^16 : FD_COMPILER_MFENCE(); \
1737 >1844*10^16 : do
1738 :
1739 : #define MAP_CRIT_END \
1740 >1844*10^16 : while(0); \
1741 >1844*10^16 : FD_COMPILER_MFENCE(); \
1742 >1844*10^16 : if( !_b ) break; /* likely compile time */ \
1743 >1844*10^16 : FD_SPIN_PAUSE(); \
1744 >1844*10^16 : } \
1745 400808 : } while(0)
1746 :
1747 : MAP_STATIC void *
1748 : MAP_(new)( void * shmem,
1749 : ulong chain_cnt,
1750 36 : ulong seed ) {
1751 :
1752 36 : if( FD_UNLIKELY( !shmem ) ) {
1753 0 : FD_LOG_WARNING(( "NULL shmem" ));
1754 0 : return NULL;
1755 0 : }
1756 :
1757 36 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, MAP_(align)() ) ) ) {
1758 0 : FD_LOG_WARNING(( "misaligned shmem" ));
1759 0 : return NULL;
1760 0 : }
1761 :
1762 36 : ulong footprint = MAP_(footprint)( chain_cnt );
1763 36 : if( FD_UNLIKELY( !footprint ) ) {
1764 0 : FD_LOG_WARNING(( "bad footprint" ));
1765 0 : return NULL;
1766 0 : }
1767 :
1768 : /* seed is arbitrary */
1769 :
1770 : /* Init the metadata */
1771 :
1772 36 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmem;
1773 :
1774 36 : map->seed = seed;
1775 36 : map->chain_cnt = chain_cnt;
1776 :
1777 : /* Set all the chains to version 0 and empty */
1778 :
1779 36 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
1780 12420 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
1781 12384 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( 0UL, 0UL );
1782 12384 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
1783 12384 : }
1784 :
1785 36 : FD_COMPILER_MFENCE();
1786 36 : map->magic = MAP_MAGIC;
1787 36 : FD_COMPILER_MFENCE();
1788 :
1789 36 : return shmem;
1790 36 : }
1791 :
1792 : MAP_STATIC MAP_(t) *
1793 : MAP_(join)( void * ljoin,
1794 : void * shmap,
1795 : void * shele,
1796 84 : ulong ele_max ) {
1797 84 : MAP_(t) * join = (MAP_(t) *)ljoin;
1798 84 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1799 84 : MAP_ELE_T * ele = (MAP_ELE_T *)shele;
1800 :
1801 84 : if( FD_UNLIKELY( !join ) ) {
1802 0 : FD_LOG_WARNING(( "NULL ljoin" ));
1803 0 : return NULL;
1804 0 : }
1805 :
1806 84 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) ) ) {
1807 0 : FD_LOG_WARNING(( "misaligned ljoin" ));
1808 0 : return NULL;
1809 0 : }
1810 :
1811 84 : if( FD_UNLIKELY( !map ) ) {
1812 0 : FD_LOG_WARNING(( "NULL shmap" ));
1813 0 : return NULL;
1814 0 : }
1815 :
1816 84 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1817 0 : FD_LOG_WARNING(( "misaligned shmap" ));
1818 0 : return NULL;
1819 0 : }
1820 :
1821 84 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1822 0 : FD_LOG_WARNING(( "bad magic" ));
1823 0 : return NULL;
1824 0 : }
1825 :
1826 84 : if( FD_UNLIKELY( (!ele) & (!!ele_max) ) ) {
1827 0 : FD_LOG_WARNING(( "NULL shele" ));
1828 0 : return NULL;
1829 0 : }
1830 :
1831 84 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) ) ) {
1832 0 : FD_LOG_WARNING(( "misaligned shele" ));
1833 0 : return NULL;
1834 0 : }
1835 :
1836 84 : if( FD_UNLIKELY( ele_max > MAP_(ele_max_max)() ) ) {
1837 0 : FD_LOG_WARNING(( "ele_max greater than ele_max_max" ));
1838 0 : return NULL;
1839 0 : }
1840 :
1841 84 : join->map = map;
1842 84 : join->ele = ele;
1843 84 : join->ele_max = ele_max;
1844 :
1845 84 : return join;
1846 84 : }
1847 :
1848 : MAP_STATIC void *
1849 24 : MAP_(leave)( MAP_(t) * join ) {
1850 :
1851 24 : if( FD_UNLIKELY( !join ) ) {
1852 0 : FD_LOG_WARNING(( "NULL join" ));
1853 0 : return NULL;
1854 0 : }
1855 :
1856 24 : return (void *)join;
1857 24 : }
1858 :
1859 : MAP_STATIC void *
1860 0 : MAP_(delete)( void * shmap ) {
1861 0 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1862 :
1863 0 : if( FD_UNLIKELY( !map ) ) {
1864 0 : FD_LOG_WARNING(( "NULL shmap" ));
1865 0 : return NULL;
1866 0 : }
1867 :
1868 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1869 0 : FD_LOG_WARNING(( "misaligned shmap" ));
1870 0 : return NULL;
1871 0 : }
1872 :
1873 0 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1874 0 : FD_LOG_WARNING(( "bad magic" ));
1875 0 : return NULL;
1876 0 : }
1877 :
1878 0 : FD_COMPILER_MFENCE();
1879 0 : map->magic = 0UL;
1880 0 : FD_COMPILER_MFENCE();
1881 :
1882 0 : return (void *)map;
1883 0 : }
1884 :
1885 : MAP_STATIC int
1886 : MAP_(insert)( MAP_(t) * join,
1887 : MAP_ELE_T * ele,
1888 184291 : int flags ) {
1889 :
1890 : /* Determine the element index (fastest if ele are power-of-two) and
1891 : the chain that should hold ele */
1892 :
1893 184291 : ulong ele_idx = (ulong)(ele - join->ele);
1894 184291 : if( FD_UNLIKELY( ele_idx>=join->ele_max ) ) return FD_MAP_ERR_INVAL;
1895 :
1896 184291 : MAP_(shmem_t) * map = join->map;
1897 :
1898 184291 : ulong memo = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
1899 184291 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
1900 :
1901 : /* Insert element at the head of chain. If chain is already locked,
1902 : signal to try again later. */
1903 :
1904 184291 : int err;
1905 :
1906 370538 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1907 186435 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1908 186435 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1909 :
1910 186435 : ele->MAP_NEXT = chain->head_cidx;
1911 : # if MAP_MEMOIZE
1912 0 : ele->MAP_MEMO = memo;
1913 : # endif
1914 186435 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
1915 186435 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL ); /* version updated on exit */
1916 186435 : err = FD_MAP_SUCCESS;
1917 :
1918 >1844*10^16 : } MAP_CRIT_BLOCKED {
1919 :
1920 >1844*10^16 : err = FD_MAP_ERR_AGAIN;
1921 :
1922 >1844*10^16 : } MAP_CRIT_END;
1923 :
1924 184291 : return err;
1925 184291 : }
1926 :
1927 : MAP_STATIC void
1928 : MAP_(hint)( MAP_(t) const * join,
1929 : MAP_KEY_T const * key,
1930 : MAP_(query_t) * query,
1931 0 : int flags ) {
1932 0 : MAP_(shmem_t) * map = join->map;
1933 0 : MAP_ELE_T * ele0 = join->ele;
1934 0 : ulong ele_max = join->ele_max;
1935 :
1936 0 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
1937 0 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
1938 :
1939 0 : if( FD_LIKELY( flags & FD_MAP_FLAG_PREFETCH_META ) ) FD_VOLATILE_CONST( chain->ver_cnt );
1940 0 : if( FD_LIKELY( flags & FD_MAP_FLAG_PREFETCH_DATA ) ) {
1941 0 : ulong ele_idx = MAP_(private_idx)( chain->head_cidx );
1942 0 : if( FD_LIKELY( ele_idx < ele_max ) ) FD_VOLATILE_CONST( ele0[ ele_idx ] );
1943 0 : }
1944 :
1945 0 : query->memo = memo;
1946 0 : }
1947 :
1948 : MAP_STATIC int
1949 : MAP_(remove)( MAP_(t) * join,
1950 : MAP_KEY_T const * key,
1951 : MAP_ELE_T const * sentinel,
1952 : MAP_(query_t) * query,
1953 215113 : int flags ) {
1954 :
1955 : /* Determine the chain that should hold key */
1956 :
1957 215113 : MAP_(shmem_t) * map = join->map;
1958 215113 : MAP_ELE_T * ele = join->ele;
1959 215113 : ulong ele_max = join->ele_max;
1960 :
1961 215113 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
1962 215113 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
1963 :
1964 : /* Find the key on the chain. If found, remove it. If not found,
1965 : corrupt or blocked, fail the operation. */
1966 :
1967 215113 : query->memo = memo;
1968 215113 : query->ele = (MAP_ELE_T *)sentinel;
1969 215113 : query->chain = chain;
1970 :
1971 215113 : int err;
1972 :
1973 431647 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1974 216453 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1975 216453 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1976 :
1977 216453 : query->ver_cnt = ver_cnt;
1978 :
1979 216453 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
1980 : # if MAP_PEDANTIC
1981 0 : FD_LOG_NOTICE(( "Corrupt map chain %p (memo %016lx): ele_cnt=%lu > ele_max=%lu", (void *)chain, memo, ele_cnt, ele_max ));
1982 : # else
1983 0 : err = FD_MAP_ERR_CORRUPT;
1984 : goto done;
1985 : # endif /* MAP_PEDANTIC */
1986 0 : }
1987 :
1988 216453 : MAP_IDX_T * cur = &chain->head_cidx;
1989 >1844*10^16 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
1990 215735 : ulong ele_idx = MAP_(private_idx)( *cur );
1991 215735 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
1992 : # if MAP_PEDANTIC
1993 0 : FD_LOG_CRIT(( "Corrupt MAP_NEXT pointer at node %p (memo %016lx, ele_rem=%lu, ele_cnt=%lu): map_next=%lu >= ele_max=%lu",
1994 : (void *)cur, memo, ele_rem, ele_cnt, ele_idx, ele_max ));
1995 : # else
1996 0 : err = FD_MAP_ERR_CORRUPT;
1997 : goto done;
1998 : # endif /* MAP_PEDANTIC */
1999 0 : }
2000 :
2001 215735 : if(
2002 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2003 0 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2004 0 : # endif
2005 215795 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2006 :
2007 215795 : *cur = ele[ ele_idx ].MAP_NEXT;
2008 215795 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL ); /* version updated on exit */
2009 215795 : query->ele = &ele[ ele_idx ];
2010 215795 : err = FD_MAP_SUCCESS;
2011 215795 : goto done;
2012 215795 : }
2013 :
2014 >1844*10^16 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2015 >1844*10^16 : }
2016 :
2017 : /* Key was not found */
2018 :
2019 658 : ulong ele_idx = MAP_(private_idx)( *cur );
2020 658 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
2021 : # if MAP_PEDANTIC
2022 0 : FD_LOG_CRIT(( "Corrupt map chain %p (memo %016lx, ele_cnt=%lu): Found element past chain: ele_idx=%lu",
2023 : (void *)chain, memo, ele_cnt, ele_idx ));
2024 : # else
2025 0 : err = FD_MAP_ERR_CORRUPT;
2026 : goto done;
2027 : # endif /* MAP_PEDANTIC */
2028 0 : }
2029 :
2030 658 : err = FD_MAP_ERR_KEY;
2031 :
2032 215798 : done: /* silly language restriction */;
2033 :
2034 >1844*10^16 : } MAP_CRIT_BLOCKED {
2035 :
2036 >1844*10^16 : query->ver_cnt = ver_cnt;
2037 >1844*10^16 : err = FD_MAP_ERR_AGAIN;
2038 :
2039 >1844*10^16 : } MAP_CRIT_END;
2040 :
2041 214458 : return err;
2042 215113 : }
2043 :
2044 : MAP_STATIC int
2045 : MAP_(modify_try)( MAP_(t) * join,
2046 : MAP_KEY_T const * key,
2047 : MAP_ELE_T * sentinel,
2048 : MAP_(query_t) * query,
2049 0 : int flags ) {
2050 :
2051 : /* Determine which chain might hold key */
2052 :
2053 0 : MAP_(shmem_t) * map = join->map;
2054 0 : MAP_ELE_T * ele = join->ele;
2055 0 : ulong ele_max = join->ele_max;
2056 :
2057 0 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2058 0 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2059 :
2060 : /* Search for the key on chain. If found, retain the chain lock
2061 : and return the found element. If not found, corrupt or blocked,
2062 : fail. */
2063 :
2064 0 : query->memo = memo;
2065 0 : query->ele = (MAP_ELE_T *)sentinel;
2066 0 : query->chain = chain;
2067 :
2068 0 : int err;
2069 :
2070 0 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
2071 :
2072 0 : query->ver_cnt = ver_cnt;
2073 :
2074 0 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2075 0 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
2076 : # if MAP_PEDANTIC
2077 0 : FD_LOG_NOTICE(( "Corrupt map chain %p (memo %016lx): ele_cnt=%lu > ele_max=%lu", (void *)chain, memo, ele_cnt, ele_max ));
2078 : # else
2079 0 : err = FD_MAP_ERR_CORRUPT;
2080 : goto done;
2081 : # endif /* MAP_PEDANTIC */
2082 0 : }
2083 :
2084 0 : MAP_IDX_T * cur = &chain->head_cidx;
2085 0 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
2086 0 : ulong ele_idx = MAP_(private_idx)( *cur );
2087 0 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
2088 : # if MAP_PEDANTIC
2089 0 : FD_LOG_CRIT(( "Corrupt MAP_NEXT pointer at node %p (memo %016lx, ele_rem=%lu, ele_cnt=%lu): map_next=%lu >= ele_max=%lu",
2090 : (void *)cur, memo, ele_rem, ele_cnt, ele_idx, ele_max ));
2091 : # else
2092 0 : err = FD_MAP_ERR_CORRUPT;
2093 : goto done;
2094 : # endif /* MAP_PEDANTIC */
2095 0 : }
2096 :
2097 0 : if(
2098 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2099 0 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2100 0 : # endif
2101 0 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2102 0 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
2103 0 : *cur = ele[ ele_idx ].MAP_NEXT;
2104 0 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
2105 0 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2106 0 : }
2107 0 : query->ele = &ele[ ele_idx ];
2108 0 : err = FD_MAP_SUCCESS;
2109 0 : retain_lock = 1;
2110 0 : goto done;
2111 0 : }
2112 :
2113 0 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2114 0 : }
2115 :
2116 0 : ulong ele_idx = MAP_(private_idx)( *cur );
2117 0 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
2118 : # if MAP_PEDANTIC
2119 0 : FD_LOG_CRIT(( "Corrupt map chain %p (memo %016lx, ele_cnt=%lu): Found element past chain: ele_idx=%lu",
2120 : (void *)chain, memo, ele_cnt, ele_idx ));
2121 : # else
2122 0 : err = FD_MAP_ERR_CORRUPT;
2123 : goto done;
2124 : # endif /* MAP_PEDANTIC */
2125 0 : }
2126 :
2127 0 : err = FD_MAP_ERR_KEY;
2128 :
2129 0 : done: /* silly language restriction */;
2130 :
2131 0 : } MAP_CRIT_BLOCKED {
2132 :
2133 0 : query->ver_cnt = ver_cnt;
2134 0 : err = FD_MAP_ERR_AGAIN;
2135 :
2136 0 : } MAP_CRIT_END;
2137 :
2138 0 : return err;
2139 0 : }
2140 :
2141 : MAP_STATIC int
2142 : MAP_(query_try)( MAP_(t) const * join,
2143 : MAP_KEY_T const * key,
2144 : MAP_ELE_T const * sentinel,
2145 : MAP_(query_t) * query,
2146 83858 : int flags ) {
2147 :
2148 : /* Determine which chain might hold key */
2149 :
2150 83858 : MAP_(shmem_t) const * map = join->map;
2151 83858 : MAP_ELE_T const * ele = join->ele;
2152 83858 : ulong ele_max = join->ele_max;
2153 :
2154 83858 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2155 83858 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, memo );
2156 :
2157 : /* Determine the version of the chain we are querying. Then
2158 : speculatively read and validate the number of elements on the chain
2159 : at that version. If the chain is locked, tell the user to try
2160 : again later. If the number of elements in the chain is invalid,
2161 : tell user the map is corrupt. */
2162 :
2163 83858 : ulong volatile const * _vc = &chain->ver_cnt;
2164 :
2165 83858 : FD_COMPILER_MFENCE();
2166 83858 : ulong then = *_vc;
2167 83858 : FD_COMPILER_MFENCE();
2168 :
2169 83858 : ulong ele_cnt = MAP_(private_vcnt_cnt)( then );
2170 :
2171 83858 : FD_COMPILER_MFENCE();
2172 83858 : ulong now = *_vc;
2173 83858 : FD_COMPILER_MFENCE();
2174 :
2175 83858 : query->memo = memo;
2176 83858 : query->ele = (MAP_ELE_T *) sentinel;
2177 83858 : query->chain = (MAP_(shmem_private_chain_t) *)chain;
2178 83858 : query->ver_cnt = then;
2179 :
2180 83858 : if( FD_UNLIKELY( (now!=then) | (!!(then & (1UL<<MAP_CNT_WIDTH))) ) ) return FD_MAP_ERR_AGAIN;
2181 83858 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) {
2182 : # if MAP_PEDANTIC
2183 0 : FD_LOG_NOTICE(( "Corrupt map chain %p (memo %016lx): ele_cnt=%lu > ele_max=%lu", (void *)chain, memo, ele_cnt, ele_max ));
2184 : # else
2185 0 : return FD_MAP_ERR_CORRUPT;
2186 : # endif /* MAP_PEDANTIC */
2187 0 : }
2188 :
2189 : /* Search the chain for key. Since we know the numer of elements on
2190 : the chain, we can bound this search to avoid corruption causing out
2191 : of bound reads, infinite loops and such. */
2192 :
2193 83858 : MAP_IDX_T const * cur = &chain->head_cidx;
2194 83912 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2195 :
2196 : /* Speculatively read the index of the chain, speculate if a valid
2197 : index and, if so, speculate if the chain element matches the
2198 : query. Note that this assumes element keys have a lifetime of at
2199 : least that of the element. A sufficient (but not a necessary,
2200 : see rant) condition for this is that key is a plain-old-data
2201 : fields in the element. */
2202 :
2203 28813 : FD_COMPILER_MFENCE();
2204 28813 : ulong ele_idx = MAP_(private_idx)( *cur );
2205 28813 : FD_COMPILER_MFENCE();
2206 :
2207 28813 : int corrupt = (ele_idx>=ele_max);
2208 28813 : int found = ( FD_LIKELY( !corrupt ) &&
2209 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2210 0 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2211 : # endif
2212 28813 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) ? 1 : 0;
2213 :
2214 : /* Validate the speculation. If validation fails (e.g. the chain
2215 : was modified behind our back), tell the user to try again later.
2216 : If the element index was not valid, tell the user the map has
2217 : been corrupted. If key was found at element, tell the user they
2218 : can speculate element ele_idx contains key. */
2219 :
2220 28813 : FD_COMPILER_MFENCE();
2221 28813 : now = *_vc;
2222 28813 : FD_COMPILER_MFENCE();
2223 :
2224 28813 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2225 28813 : if( FD_UNLIKELY( corrupt ) ) {
2226 : # if MAP_PEDANTIC
2227 0 : FD_LOG_CRIT(( "Corrupt MAP_NEXT pointer at node %p (memo %016lx, ele_rem=%lu, ele_cnt=%lu): map_next=%lu >= ele_max=%lu",
2228 : (void *)cur, memo, ele_rem, ele_cnt, ele_idx, ele_max ));
2229 : # else
2230 0 : return FD_MAP_ERR_CORRUPT;
2231 : # endif /* MAP_PEDANTIC */
2232 0 : }
2233 :
2234 28813 : if( FD_LIKELY( found ) ) { /* Optimize for found */
2235 28759 : query->ele = (MAP_ELE_T *)&ele[ ele_idx ];
2236 28759 : return FD_MAP_SUCCESS;
2237 28759 : }
2238 :
2239 : /* The chain element didn't hold the key ... move to next element */
2240 :
2241 54 : cur = &ele[ ele_idx ].MAP_NEXT;
2242 54 : }
2243 :
2244 : /* At this point, the chain didn't hold the key. We could probably
2245 : return immediately but we speculative read the tail pointer,
2246 : validate it as an additional integrity check. If these checks
2247 : pass, we are confident the whole chain looked valid and did not
2248 : hold key between now and then. */
2249 :
2250 55099 : ulong ele_idx = MAP_(private_idx)( *cur );
2251 :
2252 55099 : FD_COMPILER_MFENCE();
2253 55099 : now = *_vc;
2254 55099 : FD_COMPILER_MFENCE();
2255 :
2256 55099 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2257 55099 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) {
2258 : # if MAP_PEDANTIC
2259 0 : FD_LOG_CRIT(( "Corrupt map chain %p (memo %016lx, ele_cnt=%lu): Found element past chain: ele_idx=%lu",
2260 : (void *)chain, memo, ele_cnt, ele_idx ));
2261 : # else
2262 0 : return FD_MAP_ERR_CORRUPT;
2263 : # endif /* MAP_PEDANTIC */
2264 0 : }
2265 :
2266 55099 : return FD_MAP_ERR_KEY;
2267 55099 : }
2268 :
2269 : /* Note: txn_add is currently optimized for reasonably small number
2270 : of keys per transaction. For a huge number of transaction keys (e.g.
2271 : an iterator over all keys for all keys), probably should use the
2272 : iterator API. For a moderate number of transaction keys, probably
2273 : should consider data structures where set insert/remove/test are
2274 : sublinear time. Similarly, if MAP_KEY_HASH is costly, might be
2275 : useful to stash the key hashes in the transaction, memoize it in the
2276 : elements, etc. */
2277 :
2278 : MAP_STATIC int
2279 : MAP_(txn_add)( MAP_(txn_t) * txn,
2280 : MAP_KEY_T const * key,
2281 1404 : int lock ) {
2282 :
2283 : /* Unpack txn fields */
2284 :
2285 1404 : MAP_(shmem_t) * map = txn->map;
2286 1404 : ulong info_max = txn->info_max;
2287 1404 : ulong lock_cnt = txn->lock_cnt;
2288 1404 : ulong spec_cnt = txn->spec_cnt;
2289 :
2290 1404 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2291 1404 : MAP_(txn_private_info_t) * spec_info = lock_info + (info_max - spec_cnt);
2292 :
2293 : /* Determine which chain manages this key */
2294 :
2295 1404 : ulong memo = MAP_(key_hash)( key, map->seed );
2296 1404 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2297 :
2298 : /* If this chain already needs to be locked for this transaction,
2299 : nothing to do. */
2300 :
2301 1404 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2302 0 : if( FD_UNLIKELY( chain==lock_info[ lock_idx ].chain ) ) return FD_MAP_SUCCESS;
2303 :
2304 1404 : if( FD_UNLIKELY( !lock ) ) { /* optimize for locked key, possible compile time */
2305 :
2306 : /* At this point, key is used speculatively by the transaction and
2307 : its managing chain isn't in the locked set. If this chain is
2308 : already in the speculative set, nothing to do. */
2309 :
2310 0 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2311 0 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) return FD_MAP_SUCCESS;
2312 :
2313 : /* Add the chain to the speculative set. If we don't have any room,
2314 : fail. */
2315 :
2316 0 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2317 0 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2318 0 : spec_info[-1].chain = chain;
2319 0 : txn->spec_cnt = spec_cnt + 1UL;
2320 :
2321 1404 : } else {
2322 :
2323 : /* At this point, key is used locked by the transaction and its
2324 : managing chain isn't in the locked set. If this chain is
2325 : currently in the speculative set, move it to the locked
2326 : set. */
2327 :
2328 1404 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2329 0 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) {
2330 0 : spec_info[ spec_idx ].chain = spec_info[ 0 ].chain; /* Fill the hole at spec_idx, making a hole at 0 */
2331 0 : lock_info[ lock_cnt ].chain = chain; /* Either uses unused entry or fills hole at 0 */
2332 0 : txn->spec_cnt = spec_cnt - 1UL;
2333 0 : txn->lock_cnt = lock_cnt + 1UL;
2334 0 : return FD_MAP_SUCCESS;
2335 0 : }
2336 :
2337 : /* Add the chain to the locked set. If we don't have any room,
2338 : fail. */
2339 :
2340 1404 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2341 1404 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2342 1404 : lock_info[lock_cnt].chain = chain;
2343 1404 : txn->lock_cnt = lock_cnt + 1UL;
2344 :
2345 1404 : }
2346 :
2347 1404 : return FD_MAP_SUCCESS;
2348 1404 : }
2349 :
2350 : MAP_STATIC int
2351 : MAP_(txn_try)( MAP_(txn_t) * txn,
2352 1404 : int flags ) {
2353 1404 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2354 :
2355 : /* Unpack txn fields */
2356 :
2357 1404 : ulong info_max = txn->info_max;
2358 1404 : ulong lock_cnt = txn->lock_cnt;
2359 1404 : ulong spec_cnt = txn->spec_cnt;
2360 :
2361 1404 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2362 1404 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2363 :
2364 1404 : ulong backoff_exp = (1UL<<32); /* See iter_lock for details */
2365 1404 : ulong backoff_seed = ((ulong)(uint)flags)>>2;
2366 :
2367 1404 : int err;
2368 :
2369 1404 : for(;; ) {
2370 :
2371 1404 : err = FD_MAP_SUCCESS;
2372 :
2373 1404 : FD_COMPILER_MFENCE();
2374 :
2375 : /* Get the chain versions for all keys in the speculative set.
2376 : If any are locked, set AGAIN if any are locked. */
2377 :
2378 1404 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2379 0 : ulong ver_cnt = spec_info[ spec_idx ].chain->ver_cnt;
2380 0 : if( FD_UNLIKELY( ver_cnt & (1UL<<MAP_CNT_WIDTH) ) ) { /* Already locked */
2381 0 : err = FD_MAP_ERR_AGAIN;
2382 0 : break;
2383 0 : }
2384 0 : spec_info[ spec_idx ].ver_cnt = ver_cnt;
2385 0 : }
2386 :
2387 1404 : if( FD_LIKELY( !err ) ) {
2388 :
2389 : /* At this point, all the chains we are speculating on were
2390 : unlocked and we have recorded their versions. Try to lock
2391 : all the chains for the locked key. */
2392 : /* FIXME: consider reordering like iter_lock? */
2393 :
2394 2808 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
2395 :
2396 2808 : MAP_CRIT( lock_info[ lock_idx ].chain, 0 ) { /* non-blocking */
2397 :
2398 : /* Got the lock ... save the version and retain the lock for
2399 : test. */
2400 :
2401 1404 : lock_info[ lock_idx ].ver_cnt = ver_cnt;
2402 1404 : retain_lock = 1;
2403 :
2404 1404 : } MAP_CRIT_BLOCKED {
2405 :
2406 : /* We hit contention for this lock. Unlock the chains that
2407 : we already locked to prevent possible deadlock (see
2408 : iter_lock) */
2409 :
2410 0 : for( ulong unlock_idx=0UL; unlock_idx<lock_idx; unlock_idx++ )
2411 0 : lock_info[ unlock_idx ].chain->ver_cnt = lock_info[ unlock_idx ].ver_cnt + (2UL<<MAP_CNT_WIDTH);
2412 :
2413 0 : err = FD_MAP_ERR_AGAIN;
2414 :
2415 0 : } MAP_CRIT_END;
2416 :
2417 1404 : if( FD_UNLIKELY( err ) ) break;
2418 :
2419 1404 : }
2420 :
2421 1404 : }
2422 :
2423 1404 : FD_COMPILER_MFENCE();
2424 :
2425 1404 : if( FD_LIKELY( (!err) | non_blocking ) ) break;
2426 :
2427 : /* At this point, we hit contention and are blocking (need to try
2428 : again). Do a random backoff (see iter_lock for details). */
2429 :
2430 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt+spec_cnt, (1UL<<16)-1UL )*backoff_exp) >> 16, (1UL<<32)-1UL );
2431 0 : backoff_exp = fd_ulong_min( backoff_exp + (backoff_exp>>2) + (backoff_exp>>4), (1UL<<48)-1UL );
2432 0 : MAP_(backoff)( scale, backoff_seed );
2433 0 : }
2434 :
2435 : /* At this point, if we don't have an error, we have the chain
2436 : versions for txn keys used speculatively and they were unlocked and
2437 : we have locks on the chains for txn keys used locked. Otherwise,
2438 : this is a non-blocking call and we return AGAIN. */
2439 :
2440 1404 : return err;
2441 1404 : }
2442 :
2443 : MAP_STATIC int
2444 1404 : MAP_(txn_test)( MAP_(txn_t) * txn ) {
2445 :
2446 : /* Unpack txn fields */
2447 :
2448 1404 : ulong info_max = txn->info_max;
2449 1404 : ulong lock_cnt = txn->lock_cnt;
2450 1404 : ulong spec_cnt = txn->spec_cnt;
2451 :
2452 1404 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2453 1404 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2454 :
2455 : /* Unlock all chains locked for this transaction. Then test if any
2456 : keys used speculatively could have changed in locking / trying /
2457 : unlocking. If so, tell user to retry later. */
2458 :
2459 1404 : int err = FD_MAP_SUCCESS;
2460 :
2461 1404 : FD_COMPILER_MFENCE();
2462 :
2463 2808 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_info[ lock_idx ].chain->ver_cnt += (1UL<<MAP_CNT_WIDTH);
2464 :
2465 1404 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2466 0 : MAP_(shmem_private_chain_t) const * chain = spec_info[ spec_idx ].chain;
2467 0 : ulong ver_cnt = spec_info[ spec_idx ].ver_cnt;
2468 0 : if( FD_UNLIKELY( chain->ver_cnt!=ver_cnt ) ) {
2469 0 : err = FD_MAP_ERR_AGAIN;
2470 0 : break;
2471 0 : }
2472 0 : }
2473 :
2474 1404 : FD_COMPILER_MFENCE();
2475 :
2476 1404 : return err;
2477 1404 : }
2478 :
2479 : MAP_STATIC int
2480 : MAP_(txn_insert)( MAP_(t) * join,
2481 29793 : MAP_ELE_T * ele ) {
2482 :
2483 : /* Determine the element index (fastest if ele are power-of-two) and
2484 : the chain that should hold ele */
2485 :
2486 29793 : MAP_(shmem_t) * map = join->map;
2487 29793 : ulong ele_max = join->ele_max;
2488 :
2489 29793 : ulong ele_idx = (ulong)(ele - join->ele);
2490 29793 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_INVAL;
2491 :
2492 29793 : ulong memo = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
2493 29793 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2494 :
2495 : /* Insert ele_idx at head of chain. */
2496 :
2497 29793 : ulong ver_cnt = chain->ver_cnt;
2498 29793 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2499 29793 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2500 :
2501 29793 : ele->MAP_NEXT = chain->head_cidx;
2502 : # if MAP_MEMOIZE
2503 0 : ele->MAP_MEMO = memo;
2504 : # endif
2505 29793 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2506 29793 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL );
2507 :
2508 29793 : return FD_MAP_SUCCESS;
2509 29793 : }
2510 :
2511 : MAP_STATIC int
2512 : MAP_(txn_remove)( MAP_(t) * join,
2513 : MAP_KEY_T const * key,
2514 : MAP_ELE_T const * sentinel,
2515 : MAP_(query_t) * query,
2516 0 : int flags ) {
2517 :
2518 : /* Determine the chain that should hold key */
2519 :
2520 0 : MAP_(shmem_t) * map = join->map;
2521 0 : MAP_ELE_T * ele = join->ele;
2522 0 : ulong ele_max = join->ele_max;
2523 :
2524 0 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2525 0 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2526 :
2527 : /* Find the key on the chain and remove it */
2528 :
2529 0 : ulong ver_cnt = chain->ver_cnt;
2530 0 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2531 0 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2532 :
2533 0 : query->memo = memo;
2534 0 : query->ele = (MAP_ELE_T *)sentinel;
2535 0 : query->chain = chain;
2536 0 : query->ver_cnt = ver_cnt;
2537 :
2538 0 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
2539 : # if MAP_PEDANTIC
2540 0 : FD_LOG_NOTICE(( "Corrupt map chain %p (memo %016lx): ele_cnt=%lu > ele_max=%lu", (void *)chain, memo, ele_cnt, ele_max ));
2541 : # else
2542 0 : return FD_MAP_ERR_CORRUPT;
2543 : # endif /* MAP_PEDANTIC */
2544 0 : }
2545 :
2546 0 : MAP_IDX_T * cur = &chain->head_cidx;
2547 0 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
2548 0 : ulong ele_idx = MAP_(private_idx)( *cur );
2549 0 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
2550 : # if MAP_PEDANTIC
2551 0 : FD_LOG_CRIT(( "Corrupt MAP_NEXT pointer at node %p (memo %016lx, ele_rem=%lu, ele_cnt=%lu): map_next=%lu >= ele_max=%lu",
2552 : (void *)cur, memo, ele_rem, ele_cnt, ele_idx, ele_max ));
2553 : # else
2554 0 : return FD_MAP_ERR_CORRUPT;
2555 : # endif /* MAP_PEDANTIC */
2556 0 : }
2557 :
2558 0 : if(
2559 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2560 0 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2561 0 : # endif
2562 0 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2563 0 : *cur = ele[ ele_idx ].MAP_NEXT;
2564 0 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL );
2565 0 : query->ele = &ele[ ele_idx ];
2566 0 : return FD_MAP_SUCCESS;
2567 0 : }
2568 :
2569 0 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2570 0 : }
2571 :
2572 0 : ulong ele_idx = MAP_(private_idx)( *cur );
2573 0 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not found */
2574 : # if MAP_PEDANTIC
2575 0 : FD_LOG_CRIT(( "Corrupt map chain %p (memo %016lx, ele_cnt=%lu): Found element past chain: ele_idx=%lu",
2576 : (void *)chain, memo, ele_cnt, ele_idx ));
2577 : # else
2578 0 : return FD_MAP_ERR_CORRUPT;
2579 : # endif /* MAP_PEDANTIC */
2580 0 : }
2581 0 : return FD_MAP_ERR_KEY;
2582 0 : }
2583 :
2584 : MAP_STATIC int
2585 : MAP_(txn_modify)( MAP_(t) * join,
2586 : MAP_KEY_T const * key,
2587 : MAP_ELE_T * sentinel,
2588 : MAP_(query_t) * query,
2589 1404 : int flags ) {
2590 :
2591 : /* Determine which chain might hold key */
2592 :
2593 1404 : MAP_(shmem_t) * map = join->map;
2594 1404 : MAP_ELE_T * ele = join->ele;
2595 1404 : ulong ele_max = join->ele_max;
2596 :
2597 1404 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2598 1404 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2599 :
2600 : /* Search the chain for key */
2601 :
2602 1404 : ulong ver_cnt = chain->ver_cnt;
2603 1404 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2604 :
2605 1404 : query->memo = memo;
2606 1404 : query->ele = sentinel;
2607 1404 : query->chain = chain;
2608 1404 : query->ver_cnt = ver_cnt;
2609 :
2610 1404 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
2611 : # if MAP_PEDANTIC
2612 0 : FD_LOG_NOTICE(( "Corrupt map chain %p (memo %016lx): ele_cnt=%lu > ele_max=%lu", (void *)chain, memo, ele_cnt, ele_max ));
2613 : # else
2614 0 : return FD_MAP_ERR_CORRUPT;
2615 : # endif /* MAP_PEDANTIC */
2616 0 : }
2617 :
2618 1404 : MAP_IDX_T * cur = &chain->head_cidx;
2619 1404 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2620 0 : ulong ele_idx = MAP_(private_idx)( *cur );
2621 :
2622 0 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
2623 : # if MAP_PEDANTIC
2624 0 : FD_LOG_CRIT(( "Corrupt MAP_NEXT pointer at node %p (memo %016lx, ele_rem=%lu, ele_cnt=%lu): map_next=%lu >= ele_max=%lu",
2625 : (void *)cur, memo, ele_rem, ele_cnt, ele_idx, ele_max ));
2626 : # else
2627 0 : return FD_MAP_ERR_CORRUPT;
2628 : # endif /* MAP_PEDANTIC */
2629 0 : }
2630 :
2631 0 : if(
2632 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2633 0 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2634 0 : # endif
2635 0 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2636 0 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
2637 0 : *cur = ele[ ele_idx ].MAP_NEXT;
2638 0 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
2639 0 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2640 0 : }
2641 0 : query->ele = &ele[ ele_idx ];
2642 0 : return FD_MAP_SUCCESS;
2643 0 : }
2644 :
2645 0 : cur = &ele[ ele_idx ].MAP_NEXT;
2646 0 : }
2647 :
2648 1404 : ulong ele_idx = MAP_(private_idx)( *cur );
2649 1404 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
2650 : # if MAP_PEDANTIC
2651 0 : FD_LOG_CRIT(( "Corrupt map chain %p (memo %016lx, ele_cnt=%lu): Found element past chain: ele_idx=%lu",
2652 : (void *)chain, memo, ele_cnt, ele_idx ));
2653 : # else
2654 0 : return FD_MAP_ERR_CORRUPT;
2655 : # endif /* MAP_PEDANTIC */
2656 0 : }
2657 :
2658 1404 : return FD_MAP_ERR_KEY;
2659 1404 : }
2660 :
2661 : MAP_STATIC int
2662 : MAP_(iter_lock)( MAP_(t) * join,
2663 : ulong * lock_seq,
2664 : ulong lock_cnt,
2665 0 : int flags ) {
2666 0 : if( FD_UNLIKELY( !lock_cnt ) ) return FD_MAP_SUCCESS; /* nothing to do */
2667 0 : if( FD_UNLIKELY( (!join) | (!lock_seq) ) ) return FD_MAP_ERR_INVAL;
2668 :
2669 0 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2670 :
2671 0 : MAP_(shmem_t) * map = join->map;
2672 :
2673 0 : ulong chain_cnt = map->chain_cnt;
2674 0 : if( FD_UNLIKELY( lock_cnt>chain_cnt ) ) return FD_MAP_ERR_INVAL;
2675 :
2676 0 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2677 :
2678 0 : int err;
2679 :
2680 0 : ulong backoff = 1UL<<32; /* in [1,2^16)*2^32 */
2681 0 : ulong backoff_seed = ((ulong)(uint)flags)>>2; /* 0 usually fine */
2682 0 : ulong lock_idx = 0UL;
2683 0 : ulong locked_cnt = 0UL;
2684 0 : for(;;) {
2685 :
2686 0 : err = FD_MAP_SUCCESS;
2687 :
2688 : /* At this point, we've acquired locks [0,locked_cnt), we need to
2689 : acquire locks [locked_cnt,lock_cnt), [locked_cnt,lock_cnt) is non
2690 : empty and i is in [locked_cnt,lock_cnt). Try to acquire lock
2691 : lock_idx this iteration. */
2692 :
2693 0 : ulong chain_idx = lock_seq[ lock_idx ];
2694 :
2695 0 : if( FD_UNLIKELY( chain_idx>=chain_cnt ) ) { /* optimize for valid lock_seq */
2696 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2697 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2698 0 : locked_cnt = 0UL;
2699 0 : err = FD_MAP_ERR_AGAIN;
2700 0 : break;
2701 0 : }
2702 :
2703 0 : MAP_CRIT( chain + chain_idx, 0 ) {
2704 :
2705 : /* At this point, we got the lock. Swap lock at locked_cnt and
2706 : lock_idx and increment locked_cnt to move lock_idx to the
2707 : locked set as the most recently acquired lock. Since we
2708 : increment lock_idx below, when locked_cnt<lock_idx (i.e. we had
2709 : contention for lock locked_cnt recently), this will move the
2710 : next attempt to lock locked_cnt as far as possible from now of
2711 : the remaining locks to acquire. When locked_cnt==lock_idx,
2712 : this is a branchless no-op (and the increment of lock_idx below
2713 : will guarantee lock_idx will be at least locked_cnt next
2714 : iteration, preserving the invariant that lock_idx is in
2715 : [locked_cnt,lock_cnt) on the next iteration if there is one. */
2716 :
2717 0 : ulong chain_idx_tmp = lock_seq[ locked_cnt ];
2718 0 : lock_seq[ lock_idx ] = chain_idx_tmp;
2719 0 : lock_seq[ locked_cnt ] = chain_idx;
2720 0 : locked_cnt++;
2721 :
2722 0 : retain_lock = 1;
2723 :
2724 0 : } MAP_CRIT_BLOCKED {
2725 :
2726 : /* We failed to get lock lock_idx. To avoid deadlock with the
2727 : thread that has this lock and is trying to get a lock we
2728 : already have, we unlock the chains we've already locked (note
2729 : that we need to unlock here in non-blocking operation too).
2730 : Quick experiments in extreme contention scenarios found more
2731 : incremental approaches in blocking operation could take an
2732 : excessively long time to resolve so we bulk unlock. */
2733 :
2734 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2735 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2736 0 : locked_cnt = 0UL;
2737 :
2738 0 : err = FD_MAP_ERR_AGAIN;
2739 :
2740 0 : } MAP_CRIT_END;
2741 :
2742 0 : if( FD_UNLIKELY( (locked_cnt==lock_cnt ) | /* all locks acquired */
2743 0 : ((!!err) & non_blocking) ) ) break; /* or hit contention and are non-blocking */
2744 :
2745 : /* Move to the next lock. Everytime we wrap around, we hit
2746 : contention since the last wrap / iter start. We do a random
2747 : exponential backoff with saturation on wrapping to minimize
2748 : contention with other threads hitting these locks. Normalizing
2749 : out fixed point scalings baked into the below, we spin pause a
2750 : uniform IID random number of times in [0,unlocked_cnt*backoff]
2751 : where backoff is 1 on the first wrap and increases by ~30% each
2752 : time to a maximum of 2^16 (i.e. hundreds microseconds per
2753 : remaining lock for typical CPU speeds and spin pause delays at
2754 : maximum backoff). */
2755 :
2756 0 : lock_idx++;
2757 0 : if( FD_UNLIKELY( lock_idx==lock_cnt ) ) { /* optimize for lots of locks */
2758 0 : lock_idx = locked_cnt;
2759 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt-locked_cnt, (1UL<<16)-1UL )*backoff) >> 16, (1UL<<32)-1UL );
2760 0 : backoff = fd_ulong_min( backoff + (backoff>>2) + (backoff>>4), (1UL<<48)-1UL );
2761 0 : MAP_(backoff)( scale, backoff_seed );
2762 0 : }
2763 0 : }
2764 :
2765 0 : return err;
2766 0 : }
2767 :
2768 : MAP_STATIC void
2769 : MAP_(iter_unlock)( MAP_(t) * join,
2770 : ulong const * lock_seq,
2771 0 : ulong lock_cnt ) {
2772 0 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2773 :
2774 0 : FD_COMPILER_MFENCE();
2775 0 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2776 0 : chain[ lock_seq[ lock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2777 0 : FD_COMPILER_MFENCE();
2778 0 : }
2779 :
2780 : MAP_STATIC void
2781 0 : MAP_(reset)( MAP_(t) * join ) {
2782 0 : MAP_(shmem_t) * map = join->map;
2783 :
2784 0 : ulong chain_cnt = map->chain_cnt;
2785 0 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
2786 :
2787 0 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2788 0 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2789 0 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2790 0 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( version+2UL, 0UL );
2791 0 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
2792 0 : }
2793 0 : }
2794 :
2795 : MAP_STATIC int
2796 0 : MAP_(verify)( MAP_(t) const * join ) {
2797 :
2798 0 : # define MAP_TEST(c) do { \
2799 0 : if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_MAP_ERR_INVAL; } \
2800 0 : } while(0)
2801 :
2802 : /* Validate join */
2803 :
2804 0 : MAP_TEST( join );
2805 0 : MAP_TEST( fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) );
2806 :
2807 0 : MAP_(shmem_t) const * map = join->map;
2808 0 : MAP_ELE_T const * ele = join->ele;
2809 0 : ulong ele_max = join->ele_max;
2810 :
2811 0 : MAP_TEST( map );
2812 0 : MAP_TEST( fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) );
2813 :
2814 0 : MAP_TEST( (!!ele) | (!ele_max) );
2815 0 : MAP_TEST( fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) );
2816 :
2817 0 : MAP_TEST( ele_max<=MAP_(ele_max_max)() );
2818 :
2819 : /* Validate map metadata */
2820 :
2821 0 : ulong magic = map->magic;
2822 0 : ulong seed = map->seed;
2823 0 : ulong chain_cnt = map->chain_cnt;
2824 :
2825 0 : MAP_TEST( magic==MAP_MAGIC );
2826 : /* seed is arbitrary */
2827 0 : MAP_TEST( fd_ulong_is_pow2( chain_cnt ) );
2828 0 : MAP_TEST( chain_cnt<=MAP_(chain_max)() );
2829 :
2830 0 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, 0UL );
2831 :
2832 : /* Validate the map chains */
2833 :
2834 0 : ulong unmapped_ele_cnt = ele_max;
2835 0 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2836 :
2837 : /* Validate the chain length */
2838 :
2839 0 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2840 :
2841 0 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2842 0 : MAP_TEST( ele_cnt<=unmapped_ele_cnt );
2843 0 : unmapped_ele_cnt -= ele_cnt;
2844 :
2845 : /* Validate chain linkage, element membership and element uniqueness */
2846 :
2847 0 : ulong head_idx = MAP_(private_idx)( chain[ chain_idx ].head_cidx );
2848 0 : ulong cur_idx = head_idx;
2849 0 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2850 0 : MAP_TEST( cur_idx<ele_max ); /* In element store */
2851 :
2852 0 : MAP_KEY_T const * key = &ele[ cur_idx ].MAP_KEY;
2853 :
2854 0 : ulong memo = MAP_(key_hash)( key, seed );
2855 0 : ulong ele_chain_idx = memo & (chain_cnt-1UL);
2856 0 : MAP_TEST( ele_chain_idx==chain_idx ); /* On correct chain */
2857 : # if MAP_MEMOIZE
2858 0 : MAP_TEST( ele[ cur_idx ].MAP_MEMO==memo );
2859 0 : # endif
2860 :
2861 : /* Note that we've already validated linkage from head_idx to
2862 : cur_idx so pointer chasing here is safe. */
2863 :
2864 0 : ulong prv_idx = head_idx;
2865 0 : while( prv_idx!=cur_idx ) {
2866 0 : MAP_TEST( !MAP_(key_eq)( &ele[ prv_idx ].MAP_KEY, key ) ); /* Unique */
2867 0 : prv_idx = MAP_(private_idx)( ele[ prv_idx ].MAP_NEXT );
2868 0 : }
2869 :
2870 0 : cur_idx = MAP_(private_idx)( ele[ cur_idx ].MAP_NEXT );
2871 0 : }
2872 :
2873 0 : MAP_TEST( MAP_(private_idx_is_null)( cur_idx ) );
2874 0 : }
2875 :
2876 : /* At this point, we know the sum of the chain lengths do not exceed
2877 : the size of the element store, each chain is of their stated
2878 : length, each chain element is in element store, and that every
2879 : element on a chain belongs on that chain (which precludes the
2880 : possibility of two chains merging into one) and that every element
2881 : on a chain is unique (which implies unique among all chains since
2882 : elements with each key maps to a single chain).
2883 :
2884 : That is, join is a current local join to a valid shared mapping of
2885 : unique keys to unique elements in the element store.
2886 :
2887 : We don't know anything about unmapped elements in the element store
2888 : and cannot do any verification of them (here be dragons). But
2889 : that's kinda the point ... what's in the unmapped elements depends
2890 : on how the application is managing those. */
2891 :
2892 0 : # undef MAP_TEST
2893 :
2894 0 : return FD_MAP_SUCCESS;
2895 0 : }
2896 :
2897 : MAP_STATIC char const *
2898 0 : MAP_(strerror)( int err ) {
2899 0 : switch( err ) {
2900 0 : case FD_MAP_SUCCESS: return "success";
2901 0 : case FD_MAP_ERR_INVAL: return "bad input";
2902 0 : case FD_MAP_ERR_AGAIN: return "try again";
2903 0 : case FD_MAP_ERR_CORRUPT: return "corruption detected";
2904 0 : case FD_MAP_ERR_KEY: return "key not found";
2905 0 : default: break;
2906 0 : }
2907 0 : return "unknown";
2908 0 : }
2909 :
2910 : #undef MAP_CRIT_END
2911 : #undef MAP_CRIT_BLOCKED
2912 : #undef MAP_CRIT
2913 :
2914 : #endif
2915 :
2916 : #undef MAP_
2917 : #undef MAP_STATIC
2918 : #undef MAP_VER_WIDTH
2919 : #undef MAP_PEDANTIC
2920 : #undef MAP_IMPL_STYLE
2921 : #undef MAP_MAGIC
2922 : #undef MAP_ALIGN
2923 : #undef MAP_CUSTOM_CHAIN
2924 : #undef MAP_CNT_WIDTH
2925 : #undef MAP_KEY_EQ_IS_SLOW
2926 : #undef MAP_MEMO
2927 : #undef MAP_MEMOIZE
2928 : #undef MAP_KEY_HASH
2929 : #undef MAP_KEY_EQ
2930 : #undef MAP_NEXT
2931 : #undef MAP_IDX_T
2932 : #undef MAP_KEY
2933 : #undef MAP_KEY_T
2934 : #undef MAP_ELE_T
2935 : #undef MAP_NAME
|