Line data Source code
1 : /* fd_vinyl_io_wd.h is a vinyl_io driver that does async O_DIRECT writes 2 : via the snapwr tile. Implements a fast way to create a bstream on 3 : Linux (DMA under the hood). 4 : 5 : Internally manages a pool of DMA/LBA friendly blocks (i.e. 4 KiB 6 : aligned, O(16 MiB) size). Blocks have either state IDLE, APPEND 7 : (currently being written to), or IOWAIT (waiting for snapwr 8 : completion). */ 9 : 10 : #include "../../../vinyl/io/fd_vinyl_io.h" 11 : #include <limits.h> /* PATH_MAX */ 12 : 13 : /* wd_buf describes an O_DIRECT append buf */ 14 : 15 : struct wd_buf; 16 : typedef struct wd_buf wd_buf_t; 17 : 18 : struct wd_buf { 19 : uchar * buf; /* pointer into dcache */ 20 : uint state; /* WD_BUF_* */ 21 : wd_buf_t * next; /* next ele in linked list */ 22 : ulong io_seq; /* mcache request sequence number */ 23 : ulong bstream_seq; /* APPEND=bstream seq of first block */ 24 : /* IOWAIT=bstream seq after buffer is fully written */ 25 : }; 26 : 27 : /* WD_BUF_* give append buf states */ 28 : 29 0 : #define WD_BUF_IDLE 1U 30 0 : #define WD_BUF_APPEND 2U 31 0 : #define WD_BUF_IOWAIT 3U 32 : 33 : #define WD_WR_FSEQ_CNT_MAX (32UL) 34 : 35 : /* fd_vinyl_io_wd implements the fd_vinyl_io_t interface */ 36 : 37 : struct fd_vinyl_io_wd { 38 : fd_vinyl_io_t base[1]; 39 : ulong dev_base; 40 : ulong dev_sz; /* Block store byte size (BLOCK_SZ multiple) */ 41 : 42 : /* Buffer linked lists by state */ 43 : wd_buf_t * buf_idle; /* free stack */ 44 : wd_buf_t * buf_append; /* current wip block */ 45 : wd_buf_t * buf_iowait_head; /* least recently enqueued (seq increasing) */ 46 : wd_buf_t * buf_iowait_tail; /* most recently enqueued */ 47 : 48 : /* Work queue (snapwr) */ 49 : fd_frag_meta_t * wr_mcache; /* metadata ring */ 50 : ulong wr_seq; /* next metadata seq no */ 51 : ulong wr_seqack; /* next expected ACK seq */ 52 : ulong wr_depth; /* metadata ring depth */ 53 : uchar * wr_base; /* base pointer for data cache */ 54 : uchar * wr_chunk0; /* [wr_chunk0,wr_chunk1) is the data cache data region */ 55 : uchar * wr_chunk1; 56 : ulong const * wr_fseq[WD_WR_FSEQ_CNT_MAX]; /* completion notifications */ 57 : ulong wr_fseq_cnt;/* completion notifications count */ 58 : ulong wr_mtu; /* max block byte size */ 59 : 60 : /* Vinyl instance name (path) */ 61 : char bstream_path[PATH_MAX]; 62 : }; 63 : 64 : typedef struct fd_vinyl_io_wd fd_vinyl_io_wd_t; 65 : 66 : 67 : /* fd_vinyl_io_wd_{align,footprint} specify the alignment and footprint 68 : needed for a bstream O_DIRECT writer with block_depth max blocks 69 : inflight. align will be a reasonable power-of-2 and footprint will 70 : be a multiple of align. Returns 0 for an invalid block_depth. */ 71 : 72 : ulong 73 : fd_vinyl_io_wd_align( void ); 74 : 75 : ulong 76 : fd_vinyl_io_wd_footprint( ulong block_depth ); 77 : 78 : /* fd_vinyl_io_wd_init creates a bstream fast append backend. lmem 79 : points to a local memory region with suitable alignment and footprint 80 : to hold bstream's state. io_seed is the bstream's data integrity 81 : hashing seed. 82 : 83 : block_queue is an mcache (request queue) used to submit write 84 : requests to a snapwr. fd_mcache_depth(block_queue)==block_depth. 85 : block_dcache is a dcache (data cache) sized to block_depth*block_mtu 86 : data_sz. block_mtu is a multiple of FD_VINYL_BSTREAM_BLOCK_SZ and 87 : determines the largest O_DIRECT write operation (typically between 2 88 : to 64 MiB). block_fseq points to the snapwr tile's fseq(s) (used 89 : to report write completions). 90 : 91 : bstream_path is considered to be the vinyl instance's name. 92 : 93 : Returns a handle to the bstream on success (has ownership of lmem and 94 : dev_fd, ownership returned on fini) and NULL on failure (logs 95 : details, no ownership changed). */ 96 : 97 : fd_vinyl_io_t * 98 : fd_vinyl_io_wd_init( void * lmem, 99 : ulong dev_sz, 100 : ulong io_seed, 101 : fd_frag_meta_t * block_mcache, 102 : uchar * block_dcache, 103 : ulong const ** block_fseq, 104 : ulong block_fseq_cnt, 105 : ulong block_mtu, 106 : char const * bstream_path ); 107 : 108 : /* API restrictions: 109 : 110 : - Any method is unsupported (crash application if called) unless 111 : otherwise specified 112 : - Supported methods: append, commit, alloc, fini 113 : - In-place append not supported. All appends must use a buffer 114 : sourced from alloc as the input buffer. 115 : - append, commit, alloc require FD_VINYL_IO_FLAG_BLOCKING to be unset */ 116 : 117 : extern fd_vinyl_io_impl_t fd_vinyl_io_wd_impl; 118 : 119 : /* fd_vinyl_io_wd_alloc implements fd_vinyl_io_alloc. */ 120 : 121 : void * 122 : fd_vinyl_io_wd_alloc( fd_vinyl_io_t * io, 123 : ulong sz, 124 : int flags ); 125 : 126 : /* fd_vinyl_io_wd_busy returns 1 if there is at least one buffer in use 127 : (either APPEND or IOWAIT state). Returns 0 if all buffers are IDLE. */ 128 : 129 : int 130 : fd_vinyl_io_wd_busy( fd_vinyl_io_t * io ); 131 : 132 : /* fd_vinyl_io_wd_ctrl sends a control message to the snapwr tile. 133 : Blocks until the message is acknowledged. */ 134 : 135 : void 136 : fd_vinyl_io_wd_ctrl( fd_vinyl_io_t * io, 137 : ulong ctl, 138 : ulong sig ); 139 : 140 : /* fd_viny_io_wd_alloc_fast is an optimistic version of vinyl_io->alloc. 141 : If it fails (returns NULL), the caller should fall back to calling 142 : fd_vinyl_io_alloc normally. */ 143 : 144 : static inline void * 145 : fd_vinyl_io_wd_alloc_fast( fd_vinyl_io_t * io, 146 0 : ulong sz ) { 147 0 : fd_vinyl_io_wd_t * wd = (fd_vinyl_io_wd_t *)io; /* Note: io must be non-NULL to have even been called */ 148 0 : 149 0 : wd_buf_t * buf = wd->buf_append; 150 0 : if( FD_UNLIKELY( !buf ) ) return NULL; 151 0 : 152 0 : ulong buf_used = wd->base->seq_future - buf->bstream_seq; 153 0 : ulong buf_free = wd->wr_mtu - buf_used; 154 0 : if( FD_UNLIKELY( sz>buf_free ) ) return NULL; 155 0 : 156 0 : return buf->buf + buf_used; 157 0 : }