/src/rocksdb/trace_replay/io_tracer.h
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #pragma once |
7 | | |
8 | | #include <atomic> |
9 | | #include <fstream> |
10 | | |
11 | | #include "monitoring/instrumented_mutex.h" |
12 | | #include "port/lang.h" |
13 | | #include "rocksdb/file_system.h" |
14 | | #include "rocksdb/options.h" |
15 | | #include "rocksdb/trace_record.h" |
16 | | #include "trace_replay/trace_replay.h" |
17 | | |
18 | | namespace ROCKSDB_NAMESPACE { |
19 | | class SystemClock; |
20 | | class TraceReader; |
21 | | class TraceWriter; |
22 | | |
23 | | /* In order to log new data in trace record for specified operations, do |
24 | | following: |
25 | | 1. Add new data in IOTraceOP (say kIONewData= 3) |
26 | | 2. Log it in IOTraceWriter::WriteIOOp, and read that in |
27 | | IOTraceReader::ReadIOOp and |
28 | | IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case. |
29 | | 3. In the FileSystemTracer APIs where this data will be logged with, update |
30 | | io_op_data |= (1 << IOTraceOp::kIONewData). |
31 | | */ |
32 | | enum IOTraceOp : char { |
33 | | // The value of each enum represents the bitwise position for |
34 | | // IOTraceRecord.io_op_data. |
35 | | kIOFileSize = 0, |
36 | | kIOLen = 1, |
37 | | kIOOffset = 2, |
38 | | }; |
39 | | |
40 | | struct IOTraceRecord { |
41 | | // Required fields for all accesses. |
42 | | uint64_t access_timestamp = 0; |
43 | | TraceType trace_type = TraceType::kTraceMax; |
44 | | // Each bit in io_op_data stores which corresponding info from IOTraceOp will |
45 | | // be added in the trace. Foreg, if bit at position 1 is set then |
46 | | // IOTraceOp::kIOLen (length) will be logged in the record. |
47 | | uint64_t io_op_data = 0; |
48 | | std::string file_operation; |
49 | | uint64_t latency = 0; |
50 | | std::string io_status; |
51 | | // Stores file name instead of full path. |
52 | | std::string file_name; |
53 | | |
54 | | // Fields added to record based on IO operation. |
55 | | uint64_t len = 0; |
56 | | uint64_t offset = 0; |
57 | | uint64_t file_size = 0; |
58 | | |
59 | | // Additional information passed in IODebugContext. |
60 | | uint64_t trace_data = 0; |
61 | | std::string request_id; |
62 | | |
63 | 0 | IOTraceRecord() {} |
64 | | |
65 | | IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, |
66 | | const uint64_t& _io_op_data, const std::string& _file_operation, |
67 | | const uint64_t& _latency, const std::string& _io_status, |
68 | | const std::string& _file_name, const uint64_t& _file_size = 0) |
69 | 0 | : access_timestamp(_access_timestamp), |
70 | 0 | trace_type(_trace_type), |
71 | 0 | io_op_data(_io_op_data), |
72 | 0 | file_operation(_file_operation), |
73 | 0 | latency(_latency), |
74 | 0 | io_status(_io_status), |
75 | 0 | file_name(_file_name), |
76 | 0 | file_size(_file_size) {} |
77 | | |
78 | | IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, |
79 | | const uint64_t& _io_op_data, const std::string& _file_operation, |
80 | | const uint64_t& _latency, const std::string& _io_status, |
81 | | const std::string& _file_name, const uint64_t& _len, |
82 | | const uint64_t& _offset) |
83 | 0 | : access_timestamp(_access_timestamp), |
84 | 0 | trace_type(_trace_type), |
85 | 0 | io_op_data(_io_op_data), |
86 | 0 | file_operation(_file_operation), |
87 | 0 | latency(_latency), |
88 | 0 | io_status(_io_status), |
89 | 0 | file_name(_file_name), |
90 | 0 | len(_len), |
91 | 0 | offset(_offset) {} |
92 | | }; |
93 | | |
94 | | struct IOTraceHeader { |
95 | | uint64_t start_time; |
96 | | uint32_t rocksdb_major_version; |
97 | | uint32_t rocksdb_minor_version; |
98 | | }; |
99 | | |
100 | | // IOTraceWriter writes IO operation as a single trace. Each trace will have a |
101 | | // timestamp and type, followed by the trace payload. |
102 | | class IOTraceWriter { |
103 | | public: |
104 | | IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options, |
105 | | std::unique_ptr<TraceWriter>&& trace_writer); |
106 | 0 | ~IOTraceWriter() = default; |
107 | | // No copy and move. |
108 | | IOTraceWriter(const IOTraceWriter&) = delete; |
109 | | IOTraceWriter& operator=(const IOTraceWriter&) = delete; |
110 | | IOTraceWriter(IOTraceWriter&&) = delete; |
111 | | IOTraceWriter& operator=(IOTraceWriter&&) = delete; |
112 | | |
113 | | Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg); |
114 | | |
115 | | // Write a trace header at the beginning, typically on initiating a trace, |
116 | | // with some metadata like a magic number and RocksDB version. |
117 | | Status WriteHeader(); |
118 | | |
119 | | private: |
120 | | SystemClock* clock_; |
121 | | TraceOptions trace_options_; |
122 | | std::unique_ptr<TraceWriter> trace_writer_; |
123 | | }; |
124 | | |
125 | | // IOTraceReader helps read the trace file generated by IOTraceWriter. |
126 | | class IOTraceReader { |
127 | | public: |
128 | | explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader); |
129 | | ~IOTraceReader() = default; |
130 | | // No copy and move. |
131 | | IOTraceReader(const IOTraceReader&) = delete; |
132 | | IOTraceReader& operator=(const IOTraceReader&) = delete; |
133 | | IOTraceReader(IOTraceReader&&) = delete; |
134 | | IOTraceReader& operator=(IOTraceReader&&) = delete; |
135 | | |
136 | | Status ReadHeader(IOTraceHeader* header); |
137 | | |
138 | | Status ReadIOOp(IOTraceRecord* record); |
139 | | |
140 | | private: |
141 | | std::unique_ptr<TraceReader> trace_reader_; |
142 | | }; |
143 | | |
144 | | // An IO tracer. It uses IOTraceWriter to write the access record to the |
145 | | // trace file. |
146 | | class IOTracer { |
147 | | public: |
148 | | IOTracer(); |
149 | | ~IOTracer(); |
150 | | // No copy and move. |
151 | | IOTracer(const IOTracer&) = delete; |
152 | | IOTracer& operator=(const IOTracer&) = delete; |
153 | | IOTracer(IOTracer&&) = delete; |
154 | | IOTracer& operator=(IOTracer&&) = delete; |
155 | | |
156 | | // no_sanitize is added for tracing_enabled. writer_ is protected under mutex |
157 | | // so even if user call Start/EndIOTrace and tracing_enabled is not updated in |
158 | | // the meanwhile, WriteIOOp will anyways check the writer_ protected under |
159 | | // mutex and ignore the operation if writer_is null. So its ok if |
160 | | // tracing_enabled shows non updated value. |
161 | | |
162 | | // Start writing IO operations to the trace_writer. |
163 | | TSAN_SUPPRESSION Status |
164 | | StartIOTrace(SystemClock* clock, const TraceOptions& trace_options, |
165 | | std::unique_ptr<TraceWriter>&& trace_writer); |
166 | | |
167 | | // Stop writing IO operations to the trace_writer. |
168 | | TSAN_SUPPRESSION void EndIOTrace(); |
169 | | |
170 | 6.50M | TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; } |
171 | | |
172 | | void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg); |
173 | | |
174 | | private: |
175 | | TraceOptions trace_options_; |
176 | | // A mutex protects the writer_. |
177 | | InstrumentedMutex trace_writer_mutex_; |
178 | | std::atomic<IOTraceWriter*> writer_; |
179 | | // bool tracing_enabled is added to avoid costly operation of checking atomic |
180 | | // variable 'writer_' is nullptr or not in is_tracing_enabled(). |
181 | | // is_tracing_enabled() is invoked multiple times by FileSystem classes. |
182 | | bool tracing_enabled; |
183 | | }; |
184 | | |
185 | | } // namespace ROCKSDB_NAMESPACE |