/src/openvswitch/lib/reconnect.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include "reconnect.h" |
19 | | |
20 | | #include <stdlib.h> |
21 | | |
22 | | #include "openvswitch/poll-loop.h" |
23 | | #include "util.h" |
24 | | #include "openvswitch/vlog.h" |
25 | | |
26 | | VLOG_DEFINE_THIS_MODULE(reconnect); |
27 | | |
28 | | #define STATES \ |
29 | 0 | STATE(VOID, 1 << 0) \ |
30 | 0 | STATE(BACKOFF, 1 << 1) \ |
31 | 0 | STATE(CONNECTING, 1 << 3) \ |
32 | 0 | STATE(ACTIVE, 1 << 4) \ |
33 | 0 | STATE(IDLE, 1 << 5) \ |
34 | 0 | STATE(RECONNECT, 1 << 6) \ |
35 | 0 | STATE(LISTENING, 1 << 7) |
36 | | enum state { |
37 | | #define STATE(NAME, VALUE) S_##NAME = VALUE, |
38 | | STATES |
39 | | #undef STATE |
40 | | }; |
41 | | |
42 | | static bool |
43 | | is_connected_state(enum state state) |
44 | 0 | { |
45 | 0 | return (state & (S_ACTIVE | S_IDLE)) != 0; |
46 | 0 | } |
47 | | |
48 | | struct reconnect { |
49 | | /* Configuration. */ |
50 | | char *name; |
51 | | int min_backoff; |
52 | | int max_backoff; |
53 | | int probe_interval; |
54 | | bool passive; |
55 | | enum vlog_level info; /* Used for informational messages. */ |
56 | | |
57 | | /* State. */ |
58 | | enum state state; |
59 | | long long int state_entered; |
60 | | int backoff; |
61 | | long long int last_activity; |
62 | | long long int last_connected; |
63 | | long long int last_disconnected; |
64 | | long long int last_receive_attempt; |
65 | | unsigned int max_tries; |
66 | | unsigned int backoff_free_tries; |
67 | | |
68 | | /* These values are simply for statistics reporting, not otherwise used |
69 | | * directly by anything internal. */ |
70 | | long long int creation_time; |
71 | | unsigned int n_attempted_connections, n_successful_connections; |
72 | | unsigned int total_connected_duration; |
73 | | unsigned int seqno; |
74 | | }; |
75 | | |
76 | | static void reconnect_transition__(struct reconnect *, long long int now, |
77 | | enum state state); |
78 | | static long long int reconnect_deadline__(const struct reconnect *, |
79 | | long long int now); |
80 | | static bool reconnect_may_retry(struct reconnect *); |
81 | | |
82 | | static const char * |
83 | | reconnect_state_name__(enum state state) |
84 | 0 | { |
85 | 0 | switch (state) { |
86 | 0 | #define STATE(NAME, VALUE) case S_##NAME: return #NAME; |
87 | 0 | STATES |
88 | 0 | #undef STATE |
89 | 0 | } |
90 | 0 | return "***ERROR***"; |
91 | 0 | } |
92 | | |
93 | | /* Creates and returns a new reconnect FSM with default settings. The FSM is |
94 | | * initially disabled. The caller will likely want to call reconnect_enable() |
95 | | * and reconnect_set_name() on the returned object. */ |
96 | | struct reconnect * |
97 | | reconnect_create(long long int now) |
98 | 0 | { |
99 | 0 | struct reconnect *fsm = xzalloc(sizeof *fsm); |
100 | |
|
101 | 0 | fsm->name = xstrdup("void"); |
102 | 0 | fsm->min_backoff = RECONNECT_DEFAULT_MIN_BACKOFF; |
103 | 0 | fsm->max_backoff = RECONNECT_DEFAULT_MAX_BACKOFF; |
104 | 0 | fsm->probe_interval = RECONNECT_DEFAULT_PROBE_INTERVAL; |
105 | 0 | fsm->passive = false; |
106 | 0 | fsm->info = VLL_INFO; |
107 | |
|
108 | 0 | fsm->state = S_VOID; |
109 | 0 | fsm->state_entered = now; |
110 | 0 | fsm->backoff = 0; |
111 | 0 | fsm->last_activity = now; |
112 | 0 | fsm->last_connected = LLONG_MAX; |
113 | 0 | fsm->last_disconnected = LLONG_MAX; |
114 | 0 | fsm->last_receive_attempt = now; |
115 | 0 | fsm->max_tries = UINT_MAX; |
116 | 0 | fsm->creation_time = now; |
117 | |
|
118 | 0 | return fsm; |
119 | 0 | } |
120 | | |
121 | | /* Frees 'fsm'. */ |
122 | | void |
123 | | reconnect_destroy(struct reconnect *fsm) |
124 | 0 | { |
125 | 0 | if (fsm) { |
126 | 0 | free(fsm->name); |
127 | 0 | free(fsm); |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | /* If 'quiet' is true, 'fsm' will log informational messages at level VLL_DBG, |
132 | | * by default keeping them out of log files. This is appropriate if the |
133 | | * connection is one that is expected to be short-lived, so that the log |
134 | | * messages are merely distracting. |
135 | | * |
136 | | * If 'quiet' is false, 'fsm' logs informational messages at level VLL_INFO. |
137 | | * This is the default. |
138 | | * |
139 | | * This setting has no effect on the log level of debugging, warning, or error |
140 | | * messages. */ |
141 | | void |
142 | | reconnect_set_quiet(struct reconnect *fsm, bool quiet) |
143 | 0 | { |
144 | 0 | fsm->info = quiet ? VLL_DBG : VLL_INFO; |
145 | 0 | } |
146 | | |
147 | | /* Returns 'fsm''s name. */ |
148 | | const char * |
149 | | reconnect_get_name(const struct reconnect *fsm) |
150 | 0 | { |
151 | 0 | return fsm->name; |
152 | 0 | } |
153 | | |
154 | | /* Sets 'fsm''s name to 'name'. If 'name' is null, then "void" is used |
155 | | * instead. |
156 | | * |
157 | | * The name set for 'fsm' is used in log messages. */ |
158 | | void |
159 | | reconnect_set_name(struct reconnect *fsm, const char *name) |
160 | 0 | { |
161 | 0 | free(fsm->name); |
162 | 0 | fsm->name = xstrdup(name ? name : "void"); |
163 | 0 | } |
164 | | |
165 | | /* Return the minimum number of milliseconds to back off between consecutive |
166 | | * connection attempts. The default is RECONNECT_DEFAULT_MIN_BACKOFF. */ |
167 | | int |
168 | | reconnect_get_min_backoff(const struct reconnect *fsm) |
169 | 0 | { |
170 | 0 | return fsm->min_backoff; |
171 | 0 | } |
172 | | |
173 | | /* Return the maximum number of milliseconds to back off between consecutive |
174 | | * connection attempts. The default is RECONNECT_DEFAULT_MAX_BACKOFF. */ |
175 | | int |
176 | | reconnect_get_max_backoff(const struct reconnect *fsm) |
177 | 0 | { |
178 | 0 | return fsm->max_backoff; |
179 | 0 | } |
180 | | |
181 | | /* Returns the "probe interval" for 'fsm' in milliseconds. If this is zero, it |
182 | | * disables the connection keepalive feature. If it is nonzero, then if the |
183 | | * interval passes while 'fsm' is connected and without reconnect_activity() |
184 | | * being called for 'fsm', reconnect_run() returns RECONNECT_PROBE. If the |
185 | | * interval passes again without reconnect_activity() being called, |
186 | | * reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. */ |
187 | | int |
188 | | reconnect_get_probe_interval(const struct reconnect *fsm) |
189 | 0 | { |
190 | 0 | return fsm->probe_interval; |
191 | 0 | } |
192 | | |
193 | | /* Limits the maximum number of times that 'fsm' will ask the client to try to |
194 | | * reconnect to 'max_tries'. UINT_MAX (the default) means an unlimited number |
195 | | * of tries. |
196 | | * |
197 | | * After the number of tries has expired, the 'fsm' will disable itself |
198 | | * instead of backing off and retrying. */ |
199 | | void |
200 | | reconnect_set_max_tries(struct reconnect *fsm, unsigned int max_tries) |
201 | 0 | { |
202 | 0 | fsm->max_tries = max_tries; |
203 | 0 | } |
204 | | |
205 | | /* Returns the current remaining number of connection attempts, UINT_MAX if |
206 | | * the number is unlimited. */ |
207 | | unsigned int |
208 | | reconnect_get_max_tries(struct reconnect *fsm) |
209 | 0 | { |
210 | 0 | return fsm->max_tries; |
211 | 0 | } |
212 | | |
213 | | /* Sets the number of connection attempts that will be made without backoff to |
214 | | * 'backoff_free_tries'. Values 0 and 1 both represent a single attempt. */ |
215 | | void |
216 | | reconnect_set_backoff_free_tries(struct reconnect *fsm, |
217 | | unsigned int backoff_free_tries) |
218 | 0 | { |
219 | 0 | fsm->backoff_free_tries = backoff_free_tries; |
220 | 0 | } |
221 | | |
222 | | /* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum |
223 | | * number of milliseconds, and 'max_backoff' is the maximum, between connection |
224 | | * attempts. The current backoff is also the duration that 'fsm' is willing to |
225 | | * wait for a given connection to succeed or fail. |
226 | | * |
227 | | * 'min_backoff' must be at least 1000, and 'max_backoff' must be greater than |
228 | | * or equal to 'min_backoff'. |
229 | | * |
230 | | * Pass 0 for 'min_backoff' or 'max_backoff' or both to use the defaults. */ |
231 | | void |
232 | | reconnect_set_backoff(struct reconnect *fsm, int min_backoff, int max_backoff) |
233 | 0 | { |
234 | 0 | fsm->min_backoff = MAX(min_backoff, 1000); |
235 | 0 | fsm->max_backoff = (max_backoff |
236 | 0 | ? MAX(max_backoff, 1000) |
237 | 0 | : RECONNECT_DEFAULT_MAX_BACKOFF); |
238 | 0 | if (fsm->min_backoff > fsm->max_backoff) { |
239 | 0 | fsm->max_backoff = fsm->min_backoff; |
240 | 0 | } |
241 | |
|
242 | 0 | if (fsm->state == S_BACKOFF && fsm->backoff > max_backoff) { |
243 | 0 | fsm->backoff = max_backoff; |
244 | 0 | } |
245 | 0 | } |
246 | | |
247 | | /* Sets the "probe interval" for 'fsm' to 'probe_interval', in milliseconds. |
248 | | * If this is zero, it disables the connection keepalive feature. If it is |
249 | | * nonzero, then if the interval passes while 'fsm' is connected and without |
250 | | * reconnect_activity() being called for 'fsm', reconnect_run() returns |
251 | | * RECONNECT_PROBE. If the interval passes again without reconnect_activity() |
252 | | * being called, reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. |
253 | | * |
254 | | * If 'probe_interval' is nonzero, then it will be forced to a value of at |
255 | | * least 1000 ms. */ |
256 | | void |
257 | | reconnect_set_probe_interval(struct reconnect *fsm, int probe_interval) |
258 | 0 | { |
259 | 0 | fsm->probe_interval = probe_interval ? MAX(1000, probe_interval) : 0; |
260 | 0 | } |
261 | | |
262 | | /* Returns true if 'fsm' is in passive mode, false if 'fsm' is in active mode |
263 | | * (the default). */ |
264 | | bool |
265 | | reconnect_is_passive(const struct reconnect *fsm) |
266 | 0 | { |
267 | 0 | return fsm->passive; |
268 | 0 | } |
269 | | |
270 | | /* Configures 'fsm' for active or passive mode. In active mode (the default), |
271 | | * the FSM is attempting to connect to a remote host. In passive mode, the FSM |
272 | | * is listening for connections from a remote host. */ |
273 | | void |
274 | | reconnect_set_passive(struct reconnect *fsm, bool passive, long long int now) |
275 | 0 | { |
276 | 0 | if (fsm->passive != passive) { |
277 | 0 | fsm->passive = passive; |
278 | |
|
279 | 0 | if (passive |
280 | 0 | ? fsm->state & (S_CONNECTING | S_RECONNECT) |
281 | 0 | : fsm->state == S_LISTENING && reconnect_may_retry(fsm)) { |
282 | 0 | reconnect_transition__(fsm, now, S_BACKOFF); |
283 | 0 | fsm->backoff = 0; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | /* Returns true if 'fsm' has been enabled with reconnect_enable(). Calling |
289 | | * another function that indicates a change in connection state, such as |
290 | | * reconnect_disconnected() or reconnect_force_reconnect(), will also enable |
291 | | * a reconnect FSM. */ |
292 | | bool |
293 | | reconnect_is_enabled(const struct reconnect *fsm) |
294 | 0 | { |
295 | 0 | return fsm->state != S_VOID; |
296 | 0 | } |
297 | | |
298 | | /* If 'fsm' is disabled (the default for newly created FSMs), enables it, so |
299 | | * that the next call to reconnect_run() for 'fsm' will return |
300 | | * RECONNECT_CONNECT. |
301 | | * |
302 | | * If 'fsm' is not disabled, this function has no effect. */ |
303 | | void |
304 | | reconnect_enable(struct reconnect *fsm, long long int now) |
305 | 0 | { |
306 | 0 | if (fsm->state == S_VOID && reconnect_may_retry(fsm)) { |
307 | 0 | reconnect_transition__(fsm, now, S_BACKOFF); |
308 | 0 | fsm->backoff = 0; |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | | /* Disables 'fsm'. Until 'fsm' is enabled again, reconnect_run() will always |
313 | | * return 0. */ |
314 | | void |
315 | | reconnect_disable(struct reconnect *fsm, long long int now) |
316 | 0 | { |
317 | 0 | if (fsm->state != S_VOID) { |
318 | 0 | reconnect_transition__(fsm, now, S_VOID); |
319 | 0 | } |
320 | 0 | } |
321 | | |
322 | | /* If 'fsm' is enabled and currently connected (or attempting to connect), |
323 | | * forces reconnect_run() for 'fsm' to return RECONNECT_DISCONNECT the next |
324 | | * time it is called, which should cause the client to drop the connection (or |
325 | | * attempt), back off, and then reconnect. */ |
326 | | void |
327 | | reconnect_force_reconnect(struct reconnect *fsm, long long int now) |
328 | 0 | { |
329 | 0 | if (fsm->state & (S_CONNECTING | S_ACTIVE | S_IDLE)) { |
330 | 0 | reconnect_transition__(fsm, now, S_RECONNECT); |
331 | 0 | } |
332 | 0 | } |
333 | | |
334 | | /* Tell 'fsm' that the connection dropped or that a connection attempt failed. |
335 | | * 'error' specifies the reason: a positive value represents an errno value, |
336 | | * EOF indicates that the connection was closed by the peer (e.g. read() |
337 | | * returned 0), and 0 indicates no specific error. |
338 | | * |
339 | | * The FSM will back off, then reconnect. */ |
340 | | void |
341 | | reconnect_disconnected(struct reconnect *fsm, long long int now, int error) |
342 | 0 | { |
343 | 0 | if (!(fsm->state & (S_BACKOFF | S_VOID))) { |
344 | | /* Report what happened. */ |
345 | 0 | if (fsm->state & (S_ACTIVE | S_IDLE)) { |
346 | 0 | if (error > 0) { |
347 | 0 | VLOG_WARN("%s: connection dropped (%s)", |
348 | 0 | fsm->name, ovs_strerror(error)); |
349 | 0 | } else if (error == EOF) { |
350 | 0 | VLOG(fsm->info, "%s: connection closed by peer", fsm->name); |
351 | 0 | } else { |
352 | 0 | VLOG(fsm->info, "%s: connection dropped", fsm->name); |
353 | 0 | } |
354 | 0 | } else if (fsm->state == S_LISTENING) { |
355 | 0 | if (error > 0) { |
356 | 0 | VLOG_WARN("%s: error listening for connections (%s)", |
357 | 0 | fsm->name, ovs_strerror(error)); |
358 | 0 | } else { |
359 | 0 | VLOG(fsm->info, "%s: error listening for connections", |
360 | 0 | fsm->name); |
361 | 0 | } |
362 | 0 | } else if (fsm->backoff < fsm->max_backoff) { |
363 | 0 | const char *type = fsm->passive ? "listen" : "connection"; |
364 | 0 | if (error > 0) { |
365 | 0 | VLOG_INFO("%s: %s attempt failed (%s)", |
366 | 0 | fsm->name, type, ovs_strerror(error)); |
367 | 0 | } else { |
368 | 0 | VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type); |
369 | 0 | } |
370 | 0 | } else { |
371 | | /* We have reached the maximum backoff, so suppress logging to |
372 | | * avoid wastefully filling the log. (Previously we logged that we |
373 | | * were suppressing further logging, see below.) */ |
374 | 0 | } |
375 | |
|
376 | 0 | if (fsm->state & (S_ACTIVE | S_IDLE)) { |
377 | 0 | fsm->last_disconnected = now; |
378 | 0 | } |
379 | |
|
380 | 0 | if (!reconnect_may_retry(fsm)) { |
381 | 0 | reconnect_transition__(fsm, now, S_VOID); |
382 | 0 | return; |
383 | 0 | } |
384 | | |
385 | | /* Back off. */ |
386 | 0 | if (fsm->backoff_free_tries > 1) { |
387 | 0 | fsm->backoff_free_tries--; |
388 | 0 | fsm->backoff = 0; |
389 | 0 | } else if (fsm->state & (S_ACTIVE | S_IDLE) |
390 | 0 | && (fsm->last_activity - fsm->last_connected >= fsm->backoff |
391 | 0 | || fsm->passive)) { |
392 | 0 | fsm->backoff = fsm->passive ? 0 : fsm->min_backoff; |
393 | 0 | } else { |
394 | 0 | if (fsm->backoff < fsm->min_backoff) { |
395 | 0 | fsm->backoff = fsm->min_backoff; |
396 | 0 | } else if (fsm->backoff < fsm->max_backoff / 2) { |
397 | 0 | fsm->backoff *= 2; |
398 | 0 | VLOG(fsm->info, "%s: waiting %.3g seconds before %s", |
399 | 0 | fsm->name, fsm->backoff / 1000.0, |
400 | 0 | fsm->passive ? "trying to listen again" : "reconnect"); |
401 | 0 | } else { |
402 | 0 | if (fsm->backoff < fsm->max_backoff) { |
403 | 0 | VLOG_INFO("%s: continuing to %s in the background but " |
404 | 0 | "suppressing further logging", fsm->name, |
405 | 0 | fsm->passive ? "try to listen" : "reconnect"); |
406 | 0 | } |
407 | 0 | fsm->backoff = fsm->max_backoff; |
408 | 0 | } |
409 | 0 | } |
410 | 0 | reconnect_transition__(fsm, now, S_BACKOFF); |
411 | 0 | } |
412 | 0 | } |
413 | | |
414 | | /* Tell 'fsm' that a connection or listening attempt is in progress. |
415 | | * |
416 | | * The FSM will start a timer, after which the connection or listening attempt |
417 | | * will be aborted (by returning RECONNECT_DISCONNECT from |
418 | | * reconnect_run()). */ |
419 | | void |
420 | | reconnect_connecting(struct reconnect *fsm, long long int now) |
421 | 0 | { |
422 | 0 | if (fsm->state != S_CONNECTING) { |
423 | 0 | if (fsm->passive) { |
424 | 0 | VLOG(fsm->info, "%s: listening...", fsm->name); |
425 | 0 | } else if (fsm->backoff < fsm->max_backoff) { |
426 | 0 | VLOG(fsm->info, "%s: connecting...", fsm->name); |
427 | 0 | } |
428 | 0 | reconnect_transition__(fsm, now, S_CONNECTING); |
429 | 0 | } |
430 | 0 | } |
431 | | |
432 | | /* Tell 'fsm' that the client is listening for connection attempts. This state |
433 | | * last indefinitely until the client reports some change. |
434 | | * |
435 | | * The natural progression from this state is for the client to report that a |
436 | | * connection has been accepted or is in progress of being accepted, by calling |
437 | | * reconnect_connecting() or reconnect_connected(). |
438 | | * |
439 | | * The client may also report that listening failed (e.g. accept() returned an |
440 | | * unexpected error such as ENOMEM) by calling reconnect_listen_error(), in |
441 | | * which case the FSM will back off and eventually return RECONNECT_CONNECT |
442 | | * from reconnect_run() to tell the client to try listening again. */ |
443 | | void |
444 | | reconnect_listening(struct reconnect *fsm, long long int now) |
445 | 0 | { |
446 | 0 | if (fsm->state != S_LISTENING) { |
447 | 0 | VLOG(fsm->info, "%s: listening...", fsm->name); |
448 | 0 | reconnect_transition__(fsm, now, S_LISTENING); |
449 | 0 | } |
450 | 0 | } |
451 | | |
452 | | /* Tell 'fsm' that the client's attempt to accept a connection failed |
453 | | * (e.g. accept() returned an unexpected error such as ENOMEM). |
454 | | * |
455 | | * If the FSM is currently listening (reconnect_listening() was called), it |
456 | | * will back off and eventually return RECONNECT_CONNECT from reconnect_run() |
457 | | * to tell the client to try listening again. If there is an active |
458 | | * connection, this will be delayed until that connection drops. */ |
459 | | void |
460 | | reconnect_listen_error(struct reconnect *fsm, long long int now, int error) |
461 | 0 | { |
462 | 0 | if (fsm->state == S_LISTENING) { |
463 | 0 | reconnect_disconnected(fsm, now, error); |
464 | 0 | } |
465 | 0 | } |
466 | | |
467 | | /* Tell 'fsm' that the connection was successful. |
468 | | * |
469 | | * The FSM will start the probe interval timer, which is reset by |
470 | | * reconnect_activity(). If the timer expires, a probe will be sent (by |
471 | | * returning RECONNECT_PROBE from reconnect_run()). If the timer expires |
472 | | * again without being reset, the connection will be aborted (by returning |
473 | | * RECONNECT_DISCONNECT from reconnect_run()). */ |
474 | | void |
475 | | reconnect_connected(struct reconnect *fsm, long long int now) |
476 | 0 | { |
477 | 0 | if (!is_connected_state(fsm->state)) { |
478 | 0 | reconnect_connecting(fsm, now); |
479 | |
|
480 | 0 | VLOG(fsm->info, "%s: connected", fsm->name); |
481 | 0 | reconnect_transition__(fsm, now, S_ACTIVE); |
482 | 0 | fsm->last_connected = now; |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | | /* Tell 'fsm' that the connection attempt failed. |
487 | | * |
488 | | * The FSM will back off and attempt to reconnect. */ |
489 | | void |
490 | | reconnect_connect_failed(struct reconnect *fsm, long long int now, int error) |
491 | 0 | { |
492 | 0 | reconnect_connecting(fsm, now); |
493 | 0 | reconnect_disconnected(fsm, now, error); |
494 | 0 | } |
495 | | |
496 | | /* Tell 'fsm' that some activity has occurred on the connection. This resets |
497 | | * the probe interval timer, so that the connection is known not to be idle. */ |
498 | | void |
499 | | reconnect_activity(struct reconnect *fsm, long long int now) |
500 | 0 | { |
501 | 0 | if (fsm->state == S_IDLE) { |
502 | 0 | reconnect_transition__(fsm, now, S_ACTIVE); |
503 | 0 | } |
504 | 0 | fsm->last_activity = now; |
505 | 0 | } |
506 | | |
507 | | /* Tell 'fsm' that some attempt to receive data on the connection was made at |
508 | | * 'now'. The FSM only allows probe interval timer to expire when some attempt |
509 | | * to receive data on the connection was received after the time when it should |
510 | | * have expired. This helps in the case where there's a long delay in the poll |
511 | | * loop and then reconnect_run() executes before the code to try to receive |
512 | | * anything from the remote runs. (To disable this feature, just call |
513 | | * reconnect_receive_attempted(fsm, LLONG_MAX).) */ |
514 | | void |
515 | | reconnect_receive_attempted(struct reconnect *fsm, long long int now) |
516 | 0 | { |
517 | 0 | fsm->last_receive_attempt = now; |
518 | 0 | } |
519 | | |
520 | | static void |
521 | | reconnect_transition__(struct reconnect *fsm, long long int now, |
522 | | enum state state) |
523 | 0 | { |
524 | 0 | if (fsm->state == S_CONNECTING) { |
525 | 0 | fsm->n_attempted_connections++; |
526 | 0 | if (state == S_ACTIVE) { |
527 | 0 | fsm->n_successful_connections++; |
528 | 0 | } |
529 | 0 | } |
530 | 0 | if (is_connected_state(fsm->state) != is_connected_state(state)) { |
531 | 0 | if (is_connected_state(fsm->state)) { |
532 | 0 | fsm->total_connected_duration += now - fsm->last_connected; |
533 | 0 | } |
534 | 0 | fsm->seqno++; |
535 | 0 | } |
536 | |
|
537 | 0 | VLOG_DBG("%s: entering %s", fsm->name, reconnect_state_name__(state)); |
538 | 0 | fsm->state = state; |
539 | 0 | fsm->state_entered = now; |
540 | 0 | } |
541 | | |
542 | | static long long int |
543 | | reconnect_deadline__(const struct reconnect *fsm, long long int now) |
544 | 0 | { |
545 | 0 | ovs_assert(fsm->state_entered != LLONG_MIN); |
546 | 0 | switch (fsm->state) { |
547 | 0 | case S_VOID: |
548 | 0 | case S_LISTENING: |
549 | 0 | return LLONG_MAX; |
550 | | |
551 | 0 | case S_BACKOFF: |
552 | 0 | return fsm->state_entered + fsm->backoff; |
553 | | |
554 | 0 | case S_CONNECTING: |
555 | 0 | return fsm->state_entered + MAX(1000, fsm->backoff); |
556 | | |
557 | 0 | case S_ACTIVE: |
558 | 0 | if (fsm->probe_interval) { |
559 | 0 | long long int base = MAX(fsm->last_activity, fsm->state_entered); |
560 | 0 | long long int expiration = base + fsm->probe_interval; |
561 | 0 | if (now < expiration || fsm->last_receive_attempt >= expiration) { |
562 | | /* We still have time before the expiration or the time has |
563 | | * already passed and there was no activity. In the first case |
564 | | * we need to wait for the expiration, in the second - we're |
565 | | * already past the deadline. */ |
566 | 0 | return expiration; |
567 | 0 | } else { |
568 | | /* Time has already passed, but we didn't attempt to receive |
569 | | * anything. We need to wake up and try to receive even if |
570 | | * nothing is pending, so we can update the expiration time or |
571 | | * transition to a different state. */ |
572 | 0 | return now + 1; |
573 | 0 | } |
574 | 0 | } |
575 | 0 | return LLONG_MAX; |
576 | | |
577 | 0 | case S_IDLE: |
578 | 0 | if (fsm->probe_interval) { |
579 | 0 | long long int expiration = fsm->state_entered + fsm->probe_interval; |
580 | 0 | if (now < expiration || fsm->last_receive_attempt >= expiration) { |
581 | 0 | return expiration; |
582 | 0 | } else { |
583 | 0 | return now + 1; |
584 | 0 | } |
585 | 0 | } |
586 | 0 | return LLONG_MAX; |
587 | | |
588 | 0 | case S_RECONNECT: |
589 | 0 | return fsm->state_entered; |
590 | 0 | } |
591 | | |
592 | 0 | OVS_NOT_REACHED(); |
593 | 0 | } |
594 | | |
595 | | /* Assesses whether any action should be taken on 'fsm'. The return value is |
596 | | * one of: |
597 | | * |
598 | | * - 0: The client need not take any action. |
599 | | * |
600 | | * - Active client, RECONNECT_CONNECT: The client should start a connection |
601 | | * attempt and indicate this by calling reconnect_connecting(). If the |
602 | | * connection attempt has definitely succeeded, it should call |
603 | | * reconnect_connected(). If the connection attempt has definitely |
604 | | * failed, it should call reconnect_connect_failed(). |
605 | | * |
606 | | * The FSM is smart enough to back off correctly after successful |
607 | | * connections that quickly abort, so it is OK to call |
608 | | * reconnect_connected() after a low-level successful connection |
609 | | * (e.g. connect()) even if the connection might soon abort due to a |
610 | | * failure at a high-level (e.g. SSL negotiation failure). |
611 | | * |
612 | | * - Passive client, RECONNECT_CONNECT: The client should try to listen for |
613 | | * a connection, if it is not already listening. It should call |
614 | | * reconnect_listening() if successful, otherwise reconnect_connecting() |
615 | | * or reconnected_connect_failed() if the attempt is in progress or |
616 | | * definitely failed, respectively. |
617 | | * |
618 | | * A listening passive client should constantly attempt to accept a new |
619 | | * connection and report an accepted connection with |
620 | | * reconnect_connected(). |
621 | | * |
622 | | * - RECONNECT_DISCONNECT: The client should abort the current connection |
623 | | * or connection attempt or listen attempt and call |
624 | | * reconnect_disconnected() or reconnect_connect_failed() to indicate it. |
625 | | * |
626 | | * - RECONNECT_PROBE: The client should send some kind of request to the |
627 | | * peer that will elicit a response, to ensure that the connection is |
628 | | * indeed in working order. (This will only be returned if the "probe |
629 | | * interval" is nonzero--see reconnect_set_probe_interval()). |
630 | | */ |
631 | | enum reconnect_action |
632 | | reconnect_run(struct reconnect *fsm, long long int now) |
633 | 0 | { |
634 | 0 | if (now >= reconnect_deadline__(fsm, now)) { |
635 | 0 | switch (fsm->state) { |
636 | 0 | case S_VOID: |
637 | 0 | return 0; |
638 | | |
639 | 0 | case S_BACKOFF: |
640 | 0 | return RECONNECT_CONNECT; |
641 | | |
642 | 0 | case S_CONNECTING: |
643 | 0 | return RECONNECT_DISCONNECT; |
644 | | |
645 | 0 | case S_ACTIVE: |
646 | 0 | VLOG_DBG("%s: idle %lld ms, sending inactivity probe", fsm->name, |
647 | 0 | now - MAX(fsm->last_activity, fsm->state_entered)); |
648 | 0 | reconnect_transition__(fsm, now, S_IDLE); |
649 | 0 | return RECONNECT_PROBE; |
650 | | |
651 | 0 | case S_IDLE: |
652 | 0 | VLOG_ERR("%s: no response to inactivity probe after %.3g " |
653 | 0 | "seconds, disconnecting", |
654 | 0 | fsm->name, (now - fsm->state_entered) / 1000.0); |
655 | 0 | return RECONNECT_DISCONNECT; |
656 | | |
657 | 0 | case S_RECONNECT: |
658 | 0 | return RECONNECT_DISCONNECT; |
659 | | |
660 | 0 | case S_LISTENING: |
661 | 0 | return 0; |
662 | 0 | } |
663 | | |
664 | 0 | OVS_NOT_REACHED(); |
665 | 0 | } else { |
666 | 0 | return 0; |
667 | 0 | } |
668 | 0 | } |
669 | | |
670 | | /* Causes the next call to poll_block() to wake up when reconnect_run() should |
671 | | * be called on 'fsm'. */ |
672 | | void |
673 | | reconnect_wait(struct reconnect *fsm, long long int now) |
674 | 0 | { |
675 | 0 | int timeout = reconnect_timeout(fsm, now); |
676 | 0 | if (timeout >= 0) { |
677 | 0 | poll_timer_wait(timeout); |
678 | 0 | } |
679 | 0 | } |
680 | | |
681 | | /* Returns the number of milliseconds after which reconnect_run() should be |
682 | | * called on 'fsm' if nothing else notable happens in the meantime, or a |
683 | | * negative number if this is currently unnecessary. */ |
684 | | int |
685 | | reconnect_timeout(struct reconnect *fsm, long long int now) |
686 | 0 | { |
687 | 0 | long long int deadline = reconnect_deadline__(fsm, now); |
688 | 0 | if (deadline != LLONG_MAX) { |
689 | 0 | long long int remaining = deadline - now; |
690 | 0 | return MAX(0, MIN(INT_MAX, remaining)); |
691 | 0 | } |
692 | 0 | return -1; |
693 | 0 | } |
694 | | |
695 | | /* Returns true if 'fsm' is currently believed to be connected, that is, if |
696 | | * reconnect_connected() was called more recently than any call to |
697 | | * reconnect_connect_failed() or reconnect_disconnected() or |
698 | | * reconnect_disable(), and false otherwise. */ |
699 | | bool |
700 | | reconnect_is_connected(const struct reconnect *fsm) |
701 | 0 | { |
702 | 0 | return is_connected_state(fsm->state); |
703 | 0 | } |
704 | | |
705 | | /* Returns the number of milliseconds since 'fsm' last successfully connected |
706 | | * to its peer (even if it has since disconnected). Returns UINT_MAX if never |
707 | | * connected. */ |
708 | | unsigned int |
709 | | reconnect_get_last_connect_elapsed(const struct reconnect *fsm, |
710 | | long long int now) |
711 | 0 | { |
712 | 0 | return fsm->last_connected == LLONG_MAX ? UINT_MAX |
713 | 0 | : now - fsm->last_connected; |
714 | 0 | } |
715 | | |
716 | | /* Returns the number of milliseconds since 'fsm' last disconnected |
717 | | * from its peer (even if it has since reconnected). Returns UINT_MAX if never |
718 | | * disconnected. */ |
719 | | unsigned int |
720 | | reconnect_get_last_disconnect_elapsed(const struct reconnect *fsm, |
721 | | long long int now) |
722 | 0 | { |
723 | 0 | return fsm->last_disconnected == LLONG_MAX ? UINT_MAX |
724 | 0 | : now - fsm->last_disconnected; |
725 | 0 | } |
726 | | |
727 | | /* Copies various statistics for 'fsm' into '*stats'. */ |
728 | | void |
729 | | reconnect_get_stats(const struct reconnect *fsm, long long int now, |
730 | | struct reconnect_stats *stats) |
731 | 0 | { |
732 | 0 | stats->creation_time = fsm->creation_time; |
733 | 0 | stats->last_activity = fsm->last_activity; |
734 | 0 | stats->last_connected = fsm->last_connected; |
735 | 0 | stats->last_disconnected = fsm->last_disconnected; |
736 | 0 | stats->backoff = fsm->backoff; |
737 | 0 | stats->seqno = fsm->seqno; |
738 | 0 | stats->is_connected = reconnect_is_connected(fsm); |
739 | 0 | stats->msec_since_connect |
740 | 0 | = reconnect_get_last_connect_elapsed(fsm, now); |
741 | 0 | stats->msec_since_disconnect |
742 | 0 | = reconnect_get_last_disconnect_elapsed(fsm, now); |
743 | 0 | stats->total_connected_duration = fsm->total_connected_duration |
744 | 0 | + (is_connected_state(fsm->state) |
745 | 0 | ? reconnect_get_last_connect_elapsed(fsm, now) : 0); |
746 | 0 | stats->n_attempted_connections = fsm->n_attempted_connections; |
747 | 0 | stats->n_successful_connections = fsm->n_successful_connections; |
748 | 0 | stats->state = reconnect_state_name__(fsm->state); |
749 | 0 | stats->state_elapsed = now - fsm->state_entered; |
750 | 0 | } |
751 | | |
752 | | static bool |
753 | | reconnect_may_retry(struct reconnect *fsm) |
754 | 0 | { |
755 | 0 | bool may_retry = fsm->max_tries > 0; |
756 | 0 | if (may_retry && fsm->max_tries != UINT_MAX) { |
757 | 0 | fsm->max_tries--; |
758 | 0 | } |
759 | 0 | return may_retry; |
760 | 0 | } |