/src/ntp-dev/ntpd/ntp_monitor.c
Line | Count | Source |
1 | | /* |
2 | | * ntp_monitor - monitor ntpd statistics |
3 | | */ |
4 | | #ifdef HAVE_CONFIG_H |
5 | | # include <config.h> |
6 | | #endif |
7 | | |
8 | | #include "ntpd.h" |
9 | | #include "ntp_io.h" |
10 | | #include "ntp_if.h" |
11 | | #include "ntp_lists.h" |
12 | | #include "ntp_stdlib.h" |
13 | | #include <ntp_random.h> |
14 | | |
15 | | #include <stdio.h> |
16 | | #include <signal.h> |
17 | | #ifdef HAVE_SYS_IOCTL_H |
18 | | # include <sys/ioctl.h> |
19 | | #endif |
20 | | |
21 | | /* |
22 | | * Record statistics based on source address, mode and version. The |
23 | | * receive procedure calls us with the incoming rbufp before it does |
24 | | * anything else. While at it, implement rate controls for inbound |
25 | | * traffic. |
26 | | * |
27 | | * Each entry is doubly linked into two lists, a hash table and a most- |
28 | | * recently-used (MRU) list. When a packet arrives it is looked up in |
29 | | * the hash table. If found, the statistics are updated and the entry |
30 | | * relinked at the head of the MRU list. If not found, a new entry is |
31 | | * allocated, initialized and linked into both the hash table and at the |
32 | | * head of the MRU list. |
33 | | * |
34 | | * Memory is usually allocated by grabbing a big chunk of new memory and |
35 | | * cutting it up into littler pieces. The exception to this when we hit |
36 | | * the memory limit. Then we free memory by grabbing entries off the |
37 | | * tail for the MRU list, unlinking from the hash table, and |
38 | | * reinitializing. |
39 | | * |
40 | | * INC_MONLIST is the default allocation granularity in entries. |
41 | | * INIT_MONLIST is the default initial allocation in entries. |
42 | | */ |
43 | | #ifdef MONMEMINC /* old name */ |
44 | | # define INC_MONLIST MONMEMINC |
45 | | #elif !defined(INC_MONLIST) |
46 | | # define INC_MONLIST (4 * 1024 / sizeof(mon_entry)) |
47 | | #endif |
48 | | #ifndef INIT_MONLIST |
49 | | # define INIT_MONLIST (4 * 1024 / sizeof(mon_entry)) |
50 | | #endif |
51 | | #ifndef MRU_MAXDEPTH_DEF |
52 | | # define MRU_MAXDEPTH_DEF (1024 * 1024 / sizeof(mon_entry)) |
53 | | #endif |
54 | | |
55 | | /* |
56 | | * Hashing stuff |
57 | | */ |
58 | | u_char mon_hash_bits; |
59 | | |
60 | | /* |
61 | | * Pointers to the hash table and the MRU list. Memory for the hash |
62 | | * table is allocated only if monitoring is enabled. |
63 | | */ |
64 | | mon_entry ** mon_hash; /* MRU hash table */ |
65 | | mon_entry mon_mru_list; /* mru listhead */ |
66 | | |
67 | | /* |
68 | | * List of free structures structures, and counters of in-use and total |
69 | | * structures. The free structures are linked with the hash_next field. |
70 | | */ |
71 | | static mon_entry *mon_free; /* free list or null if none */ |
72 | | u_int mru_alloc; /* mru list + free list count */ |
73 | | u_int mru_entries; /* mru list count */ |
74 | | u_int mru_peakentries; /* highest mru_entries seen */ |
75 | | u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */ |
76 | | u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */ |
77 | | static u_int mon_mem_increments; /* times called malloc() */ |
78 | | |
79 | | /* |
80 | | * Parameters of the RES_LIMITED restriction option. We define headway |
81 | | * as the idle time between packets. A packet is discarded if the |
82 | | * headway is less than the minimum, as well as if the average headway |
83 | | * is less than eight times the increment. |
84 | | */ |
85 | | int ntp_minpkt = NTP_MINPKT; /* minimum seconds between */ |
86 | | /* requests from a client */ |
87 | | u_char ntp_minpoll = NTP_MINPOLL; /* minimum average log2 seconds */ |
88 | | /* between client requests */ |
89 | | |
90 | | /* |
91 | | * Initialization state. We may be monitoring, we may not. If |
92 | | * we aren't, we may not even have allocated any memory yet. |
93 | | */ |
94 | | u_int mon_enabled; /* enable switch */ |
95 | | u_int mru_mindepth = 600; /* preempt above this */ |
96 | | int mru_maxage = 64; /* for entries older than */ |
97 | | u_int mru_maxdepth = /* MRU count hard limit */ |
98 | | MRU_MAXDEPTH_DEF; |
99 | | int mon_age = 3000; /* preemption limit */ |
100 | | |
101 | | static void mon_getmoremem(void); |
102 | | static void remove_from_hash(mon_entry *); |
103 | | static inline void mon_free_entry(mon_entry *); |
104 | | static inline void mon_reclaim_entry(mon_entry *); |
105 | | |
106 | | |
107 | | /* |
108 | | * init_mon - initialize monitoring global data |
109 | | */ |
110 | | void |
111 | | init_mon(void) |
112 | 1 | { |
113 | | /* |
114 | | * Don't do much of anything here. We don't allocate memory |
115 | | * until mon_start(). |
116 | | */ |
117 | 1 | mon_enabled = MON_OFF; |
118 | 1 | INIT_DLIST(mon_mru_list, mru); |
119 | 1 | } |
120 | | |
121 | | |
122 | | /* |
123 | | * remove_from_hash - removes an entry from the address hash table and |
124 | | * decrements mru_entries. |
125 | | */ |
126 | | static void |
127 | | remove_from_hash( |
128 | | mon_entry *mon |
129 | | ) |
130 | 0 | { |
131 | 0 | u_int hash; |
132 | 0 | mon_entry *punlinked; |
133 | |
|
134 | 0 | mru_entries--; |
135 | 0 | hash = MON_HASH(&mon->rmtadr); |
136 | 0 | UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next, |
137 | 0 | mon_entry); |
138 | 0 | ENSURE(punlinked == mon); |
139 | 0 | } |
140 | | |
141 | | |
142 | | static inline void |
143 | | mon_free_entry( |
144 | | mon_entry *m |
145 | | ) |
146 | 0 | { |
147 | 0 | ZERO(*m); |
148 | 0 | LINK_SLIST(mon_free, m, hash_next); |
149 | 0 | } |
150 | | |
151 | | |
152 | | /* |
153 | | * mon_reclaim_entry - Remove an entry from the MRU list and from the |
154 | | * hash array, then zero-initialize it. Indirectly |
155 | | * decrements mru_entries. |
156 | | |
157 | | * The entry is prepared to be reused. Before return, in |
158 | | * remove_from_hash(), mru_entries is decremented. It is the caller's |
159 | | * responsibility to increment it again. |
160 | | */ |
161 | | static inline void |
162 | | mon_reclaim_entry( |
163 | | mon_entry *m |
164 | | ) |
165 | 0 | { |
166 | 0 | DEBUG_INSIST(NULL != m); |
167 | | |
168 | 0 | UNLINK_DLIST(m, mru); |
169 | 0 | remove_from_hash(m); |
170 | 0 | ZERO(*m); |
171 | 0 | } |
172 | | |
173 | | |
174 | | /* |
175 | | * mon_getmoremem - get more memory and put it on the free list |
176 | | */ |
177 | | static void |
178 | | mon_getmoremem(void) |
179 | 0 | { |
180 | 0 | mon_entry *chunk; |
181 | 0 | u_int entries; |
182 | |
|
183 | 0 | entries = (0 == mon_mem_increments) |
184 | 0 | ? mru_initalloc |
185 | 0 | : mru_incalloc; |
186 | |
|
187 | 0 | if (entries) { |
188 | 0 | chunk = eallocarray(entries, sizeof(*chunk)); |
189 | 0 | mru_alloc += entries; |
190 | 0 | for (chunk += entries; entries; entries--) |
191 | 0 | mon_free_entry(--chunk); |
192 | |
|
193 | 0 | mon_mem_increments++; |
194 | 0 | } |
195 | 0 | } |
196 | | |
197 | | |
198 | | /* |
199 | | * mon_start - start up the monitoring software |
200 | | */ |
201 | | void |
202 | | mon_start( |
203 | | int mode |
204 | | ) |
205 | 0 | { |
206 | 0 | size_t octets; |
207 | 0 | u_int min_hash_slots; |
208 | |
|
209 | 0 | if (MON_OFF == mode) /* MON_OFF is 0 */ |
210 | 0 | return; |
211 | 0 | if (mon_enabled) { |
212 | 0 | mon_enabled |= mode; |
213 | 0 | return; |
214 | 0 | } |
215 | 0 | if (0 == mon_mem_increments) |
216 | 0 | mon_getmoremem(); |
217 | | /* |
218 | | * Select the MRU hash table size to limit the average count |
219 | | * per bucket at capacity (mru_maxdepth) to 8, if possible |
220 | | * given our hash is limited to 16 bits. |
221 | | */ |
222 | 0 | min_hash_slots = (mru_maxdepth / 8) + 1; |
223 | 0 | mon_hash_bits = 0; |
224 | 0 | while (min_hash_slots >>= 1) |
225 | 0 | mon_hash_bits++; |
226 | 0 | mon_hash_bits = max(4, mon_hash_bits); |
227 | 0 | mon_hash_bits = min(16, mon_hash_bits); |
228 | 0 | octets = sizeof(*mon_hash) * MON_HASH_SIZE; |
229 | 0 | mon_hash = erealloc_zero(mon_hash, octets, 0); |
230 | |
|
231 | 0 | mon_enabled = mode; |
232 | 0 | } |
233 | | |
234 | | |
235 | | /* |
236 | | * mon_stop - stop the monitoring software |
237 | | */ |
238 | | void |
239 | | mon_stop( |
240 | | int mode |
241 | | ) |
242 | 0 | { |
243 | 0 | mon_entry *mon; |
244 | |
|
245 | 0 | if (MON_OFF == mon_enabled) |
246 | 0 | return; |
247 | 0 | if ((mon_enabled & mode) == 0 || mode == MON_OFF) |
248 | 0 | return; |
249 | | |
250 | 0 | mon_enabled &= ~mode; |
251 | 0 | if (mon_enabled != MON_OFF) |
252 | 0 | return; |
253 | | |
254 | | /* |
255 | | * Move everything on the MRU list to the free list quickly, |
256 | | * without bothering to remove each from either the MRU list or |
257 | | * the hash table. |
258 | | */ |
259 | 0 | ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) |
260 | 0 | mon_free_entry(mon); |
261 | 0 | ITER_DLIST_END() |
262 | | |
263 | | /* empty the MRU list and hash table. */ |
264 | 0 | mru_entries = 0; |
265 | 0 | INIT_DLIST(mon_mru_list, mru); |
266 | 0 | zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE); |
267 | 0 | } |
268 | | |
269 | | |
270 | | /* |
271 | | * mon_clearinterface -- remove mru entries referring to a local address |
272 | | * which is going away. |
273 | | */ |
274 | | void |
275 | | mon_clearinterface( |
276 | | endpt *lcladr |
277 | | ) |
278 | 0 | { |
279 | 0 | mon_entry *mon; |
280 | | |
281 | | /* iterate mon over mon_mru_list */ |
282 | 0 | ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) |
283 | 0 | if (mon->lcladr == lcladr) { |
284 | | /* remove from mru list */ |
285 | 0 | UNLINK_DLIST(mon, mru); |
286 | | /* remove from hash list, adjust mru_entries */ |
287 | 0 | remove_from_hash(mon); |
288 | | /* put on free list */ |
289 | 0 | mon_free_entry(mon); |
290 | 0 | } |
291 | 0 | ITER_DLIST_END() |
292 | 0 | } |
293 | | |
294 | | |
295 | | /* |
296 | | * ntp_monitor - record stats about this packet |
297 | | * |
298 | | * Returns supplied restriction flags, with RES_LIMITED and RES_KOD |
299 | | * cleared unless the packet should not be responded to normally |
300 | | * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD). |
301 | | * The returned flags are saved in the MRU entry, so that it reflects |
302 | | * whether the last packet from that source triggered rate limiting, |
303 | | * and if so, possible KoD response. This implies you can not tell |
304 | | * whether a given address is eligible for rate limiting/KoD from the |
305 | | * monlist restrict bits, only whether or not the last packet triggered |
306 | | * such responses. ntpdc -c reslist lets you see whether RES_LIMITED |
307 | | * or RES_KOD is lit for a particular address before ntp_monitor()'s |
308 | | * typical dousing. |
309 | | */ |
310 | | u_short |
311 | | ntp_monitor( |
312 | | struct recvbuf *rbufp, |
313 | | u_short flags |
314 | | ) |
315 | 0 | { |
316 | 0 | l_fp interval_fp; |
317 | 0 | struct pkt * pkt; |
318 | 0 | mon_entry * mon; |
319 | 0 | mon_entry * oldest; |
320 | 0 | int oldest_age; |
321 | 0 | u_int hash; |
322 | 0 | u_short restrict_mask; |
323 | 0 | u_char mode; |
324 | 0 | u_char version; |
325 | 0 | int interval; |
326 | 0 | int head; /* headway increment */ |
327 | 0 | int leak; /* new headway */ |
328 | 0 | int limit; /* average threshold */ |
329 | |
|
330 | 0 | REQUIRE(rbufp != NULL); |
331 | | |
332 | 0 | if (mon_enabled == MON_OFF) { |
333 | 0 | return ~(RES_LIMITED | RES_KOD) & flags; |
334 | 0 | } |
335 | 0 | pkt = &rbufp->recv_pkt; |
336 | 0 | hash = MON_HASH(&rbufp->recv_srcadr); |
337 | 0 | mode = PKT_MODE(pkt->li_vn_mode); |
338 | 0 | version = PKT_VERSION(pkt->li_vn_mode); |
339 | 0 | mon = mon_hash[hash]; |
340 | | |
341 | | /* |
342 | | * We keep track of all traffic for a given IP in one entry, |
343 | | * otherwise cron'ed ntpdate or similar evades RES_LIMITED. |
344 | | */ |
345 | |
|
346 | 0 | for (; mon != NULL; mon = mon->hash_next) { |
347 | 0 | if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) { |
348 | 0 | break; |
349 | 0 | } |
350 | 0 | } |
351 | 0 | if (mon != NULL) { |
352 | 0 | interval_fp = rbufp->recv_time; |
353 | 0 | L_SUB(&interval_fp, &mon->last); |
354 | | /* add one-half second to round up */ |
355 | 0 | L_ADDUF(&interval_fp, 0x80000000); |
356 | 0 | interval = interval_fp.l_i; |
357 | 0 | mon->last = rbufp->recv_time; |
358 | 0 | NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr); |
359 | 0 | mon->count++; |
360 | 0 | restrict_mask = flags; |
361 | 0 | mon->vn_mode = VN_MODE(version, mode); |
362 | | |
363 | | /* Shuffle to the head of the MRU list. */ |
364 | 0 | UNLINK_DLIST(mon, mru); |
365 | 0 | LINK_DLIST(mon_mru_list, mon, mru); |
366 | | |
367 | | /* |
368 | | * At this point the most recent arrival is first in the |
369 | | * MRU list. Decrease the counter by the headway, but |
370 | | * not less than zero. |
371 | | */ |
372 | 0 | mon->leak -= interval; |
373 | 0 | mon->leak = max(0, mon->leak); |
374 | 0 | head = 1 << ntp_minpoll; |
375 | 0 | leak = mon->leak + head; |
376 | 0 | limit = NTP_SHIFT * head; |
377 | |
|
378 | 0 | DPRINTF(2, ("MRU: interval %d headway %d limit %d\n", |
379 | 0 | interval, leak, limit)); |
380 | | |
381 | | /* |
382 | | * If the minimum and average thresholds are not |
383 | | * exceeded, douse the RES_LIMITED and RES_KOD bits and |
384 | | * increase the counter by the headway increment. Note |
385 | | * that we give a 1-s grace for the minimum threshold |
386 | | * and a 2-s grace for the headway increment. If one or |
387 | | * both thresholds are exceeded and the old counter is |
388 | | * less than the average threshold, set the counter to |
389 | | * the average threshold plus the increment and leave |
390 | | * the RES_LIMITED and RES_KOD bits lit. Otherwise, |
391 | | * leave the counter alone and douse the RES_KOD bit. |
392 | | * This rate-limits the KoDs to no more often than the |
393 | | * average headway. |
394 | | */ |
395 | 0 | if (interval + 1 >= ntp_minpkt && leak < limit) { |
396 | 0 | mon->leak = leak - 2; |
397 | 0 | restrict_mask &= ~(RES_LIMITED | RES_KOD); |
398 | 0 | } else if (mon->leak < limit) { |
399 | 0 | mon->leak = limit + head; |
400 | 0 | } else { |
401 | 0 | restrict_mask &= ~RES_KOD; |
402 | 0 | } |
403 | 0 | mon->flags = restrict_mask; |
404 | |
|
405 | 0 | return mon->flags; |
406 | 0 | } |
407 | | |
408 | | /* |
409 | | * If we got here, this is the first we've heard of this |
410 | | * guy. Get him some memory, either from the free list |
411 | | * or from the tail of the MRU list. |
412 | | * |
413 | | * The following ntp.conf "mru" knobs come into play determining |
414 | | * the depth (or count) of the MRU list: |
415 | | * - mru_mindepth ("mru mindepth") is a floor beneath which |
416 | | * entries are kept without regard to their age. The |
417 | | * default is 600 which matches the longtime implementation |
418 | | * limit on the total number of entries. |
419 | | * - mru_maxage ("mru maxage") is a ceiling on the age in |
420 | | * seconds of entries. Entries older than this are |
421 | | * reclaimed once mon_mindepth is exceeded. 64s default. |
422 | | * Note that entries older than this can easily survive |
423 | | * as they are reclaimed only as needed. |
424 | | * - mru_maxdepth ("mru maxdepth") is a hard limit on the |
425 | | * number of entries. |
426 | | * - "mru maxmem" sets mru_maxdepth to the number of entries |
427 | | * which fit in the given number of kilobytes. The default is |
428 | | * 1024, or 1 megabyte. |
429 | | * - mru_initalloc ("mru initalloc" sets the count of the |
430 | | * initial allocation of MRU entries. |
431 | | * - "mru initmem" sets mru_initalloc in units of kilobytes. |
432 | | * The default is 4. |
433 | | * - mru_incalloc ("mru incalloc" sets the number of entries to |
434 | | * allocate on-demand each time the free list is empty. |
435 | | * - "mru incmem" sets mru_incalloc in units of kilobytes. |
436 | | * The default is 4. |
437 | | * Whichever of "mru maxmem" or "mru maxdepth" occurs last in |
438 | | * ntp.conf controls. Similarly for "mru initalloc" and "mru |
439 | | * initmem", and for "mru incalloc" and "mru incmem". |
440 | | */ |
441 | 0 | if (mru_entries < mru_mindepth) { |
442 | 0 | if (NULL == mon_free) |
443 | 0 | mon_getmoremem(); |
444 | 0 | UNLINK_HEAD_SLIST(mon, mon_free, hash_next); |
445 | 0 | } else { |
446 | 0 | oldest = TAIL_DLIST(mon_mru_list, mru); |
447 | 0 | oldest_age = 0; /* silence uninit warning */ |
448 | 0 | if (oldest != NULL) { |
449 | 0 | interval_fp = rbufp->recv_time; |
450 | 0 | L_SUB(&interval_fp, &oldest->last); |
451 | | /* add one-half second to round up */ |
452 | 0 | L_ADDUF(&interval_fp, 0x80000000); |
453 | 0 | oldest_age = interval_fp.l_i; |
454 | 0 | } |
455 | | /* note -1 is legal for mru_maxage (disables) */ |
456 | 0 | if (oldest != NULL && mru_maxage < oldest_age) { |
457 | 0 | mon_reclaim_entry(oldest); |
458 | 0 | mon = oldest; |
459 | 0 | } else if (mon_free != NULL || mru_alloc < |
460 | 0 | mru_maxdepth) { |
461 | 0 | if (NULL == mon_free) |
462 | 0 | mon_getmoremem(); |
463 | 0 | UNLINK_HEAD_SLIST(mon, mon_free, hash_next); |
464 | | /* Preempt from the MRU list if old enough. */ |
465 | 0 | } else if (ntp_uurandom() > |
466 | 0 | (double)oldest_age / mon_age) { |
467 | 0 | return ~(RES_LIMITED | RES_KOD) & flags; |
468 | 0 | } else { |
469 | 0 | mon_reclaim_entry(oldest); |
470 | 0 | mon = oldest; |
471 | 0 | } |
472 | 0 | } |
473 | | |
474 | 0 | INSIST(mon != NULL); |
475 | | |
476 | | /* |
477 | | * Got one, initialize it |
478 | | */ |
479 | 0 | mru_entries++; |
480 | 0 | mru_peakentries = max(mru_peakentries, mru_entries); |
481 | 0 | mon->last = rbufp->recv_time; |
482 | 0 | mon->first = mon->last; |
483 | 0 | mon->count = 1; |
484 | 0 | mon->flags = ~(RES_LIMITED | RES_KOD) & flags; |
485 | 0 | mon->leak = 0; |
486 | 0 | memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr)); |
487 | 0 | mon->vn_mode = VN_MODE(version, mode); |
488 | 0 | mon->lcladr = rbufp->dstadr; |
489 | 0 | mon->cast_flags = (u_char)(((rbufp->dstadr->flags & |
490 | 0 | INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST |
491 | 0 | : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST); |
492 | | |
493 | | /* |
494 | | * Drop him into front of the hash table. Also put him on top of |
495 | | * the MRU list. |
496 | | */ |
497 | 0 | LINK_SLIST(mon_hash[hash], mon, hash_next); |
498 | 0 | LINK_DLIST(mon_mru_list, mon, mru); |
499 | |
|
500 | 0 | return mon->flags; |
501 | 0 | } |
502 | | |
503 | | |