Home | History | Annotate | Line # | Download | only in ntpd
ntp_monitor.c revision 1.1.1.2.2.2
      1 /*	$NetBSD: ntp_monitor.c,v 1.1.1.2.2.2 2015/11/07 22:26:36 snj Exp $	*/
      2 
      3 /*
      4  * ntp_monitor - monitor ntpd statistics
      5  */
      6 #ifdef HAVE_CONFIG_H
      7 # include <config.h>
      8 #endif
      9 
     10 #include "ntpd.h"
     11 #include "ntp_io.h"
     12 #include "ntp_if.h"
     13 #include "ntp_lists.h"
     14 #include "ntp_stdlib.h"
     15 #include <ntp_random.h>
     16 
     17 #include <stdio.h>
     18 #include <signal.h>
     19 #ifdef HAVE_SYS_IOCTL_H
     20 # include <sys/ioctl.h>
     21 #endif
     22 
     23 /*
     24  * Record statistics based on source address, mode and version. The
     25  * receive procedure calls us with the incoming rbufp before it does
     26  * anything else. While at it, implement rate controls for inbound
     27  * traffic.
     28  *
     29  * Each entry is doubly linked into two lists, a hash table and a most-
     30  * recently-used (MRU) list. When a packet arrives it is looked up in
     31  * the hash table. If found, the statistics are updated and the entry
     32  * relinked at the head of the MRU list. If not found, a new entry is
     33  * allocated, initialized and linked into both the hash table and at the
     34  * head of the MRU list.
     35  *
     36  * Memory is usually allocated by grabbing a big chunk of new memory and
     37  * cutting it up into littler pieces. The exception to this when we hit
     38  * the memory limit. Then we free memory by grabbing entries off the
     39  * tail for the MRU list, unlinking from the hash table, and
     40  * reinitializing.
     41  *
     42  * INC_MONLIST is the default allocation granularity in entries.
     43  * INIT_MONLIST is the default initial allocation in entries.
     44  */
     45 #ifdef MONMEMINC		/* old name */
     46 # define	INC_MONLIST	MONMEMINC
     47 #elif !defined(INC_MONLIST)
     48 # define	INC_MONLIST	(4 * 1024 / sizeof(mon_entry))
     49 #endif
     50 #ifndef INIT_MONLIST
     51 # define	INIT_MONLIST	(4 * 1024 / sizeof(mon_entry))
     52 #endif
     53 #ifndef MRU_MAXDEPTH_DEF
     54 # define MRU_MAXDEPTH_DEF	(1024 * 1024 / sizeof(mon_entry))
     55 #endif
     56 
     57 /*
     58  * Hashing stuff
     59  */
     60 u_char	mon_hash_bits;
     61 
     62 /*
     63  * Pointers to the hash table and the MRU list.  Memory for the hash
     64  * table is allocated only if monitoring is enabled.
     65  */
     66 mon_entry **	mon_hash;	/* MRU hash table */
     67 mon_entry	mon_mru_list;	/* mru listhead */
     68 
     69 /*
     70  * List of free structures structures, and counters of in-use and total
     71  * structures. The free structures are linked with the hash_next field.
     72  */
     73 static  mon_entry *mon_free;		/* free list or null if none */
     74 	u_int mru_alloc;		/* mru list + free list count */
     75 	u_int mru_entries;		/* mru list count */
     76 	u_int mru_peakentries;		/* highest mru_entries seen */
     77 	u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */
     78 	u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */
     79 static	u_int mon_mem_increments;	/* times called malloc() */
     80 
     81 /*
     82  * Parameters of the RES_LIMITED restriction option. We define headway
     83  * as the idle time between packets. A packet is discarded if the
     84  * headway is less than the minimum, as well as if the average headway
     85  * is less than eight times the increment.
     86  */
     87 int	ntp_minpkt = NTP_MINPKT;	/* minimum (log 2 s) */
     88 u_char	ntp_minpoll = NTP_MINPOLL;	/* increment (log 2 s) */
     89 
     90 /*
     91  * Initialization state.  We may be monitoring, we may not.  If
     92  * we aren't, we may not even have allocated any memory yet.
     93  */
     94 	u_int	mon_enabled;		/* enable switch */
     95 	u_int	mru_mindepth = 600;	/* preempt above this */
     96 	int	mru_maxage = 64;	/* for entries older than */
     97 	u_int	mru_maxdepth = 		/* MRU count hard limit */
     98 			MRU_MAXDEPTH_DEF;
     99 	int	mon_age = 3000;		/* preemption limit */
    100 
    101 static	void		mon_getmoremem(void);
    102 static	void		remove_from_hash(mon_entry *);
    103 static	inline void	mon_free_entry(mon_entry *);
    104 static	inline void	mon_reclaim_entry(mon_entry *);
    105 
    106 
    107 /*
    108  * init_mon - initialize monitoring global data
    109  */
    110 void
    111 init_mon(void)
    112 {
    113 	/*
    114 	 * Don't do much of anything here.  We don't allocate memory
    115 	 * until mon_start().
    116 	 */
    117 	mon_enabled = MON_OFF;
    118 	INIT_DLIST(mon_mru_list, mru);
    119 }
    120 
    121 
    122 /*
    123  * remove_from_hash - removes an entry from the address hash table and
    124  *		      decrements mru_entries.
    125  */
    126 static void
    127 remove_from_hash(
    128 	mon_entry *mon
    129 	)
    130 {
    131 	u_int hash;
    132 	mon_entry *punlinked;
    133 
    134 	mru_entries--;
    135 	hash = MON_HASH(&mon->rmtadr);
    136 	UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next,
    137 		     mon_entry);
    138 	ENSURE(punlinked == mon);
    139 }
    140 
    141 
    142 static inline void
    143 mon_free_entry(
    144 	mon_entry *m
    145 	)
    146 {
    147 	ZERO(*m);
    148 	LINK_SLIST(mon_free, m, hash_next);
    149 }
    150 
    151 
    152 /*
    153  * mon_reclaim_entry - Remove an entry from the MRU list and from the
    154  *		       hash array, then zero-initialize it.  Indirectly
    155  *		       decrements mru_entries.
    156 
    157  * The entry is prepared to be reused.  Before return, in
    158  * remove_from_hash(), mru_entries is decremented.  It is the caller's
    159  * responsibility to increment it again.
    160  */
    161 static inline void
    162 mon_reclaim_entry(
    163 	mon_entry *m
    164 	)
    165 {
    166 	DEBUG_INSIST(NULL != m);
    167 
    168 	UNLINK_DLIST(m, mru);
    169 	remove_from_hash(m);
    170 	ZERO(*m);
    171 }
    172 
    173 
    174 /*
    175  * mon_getmoremem - get more memory and put it on the free list
    176  */
    177 static void
    178 mon_getmoremem(void)
    179 {
    180 	mon_entry *chunk;
    181 	u_int entries;
    182 
    183 	entries = (0 == mon_mem_increments)
    184 		      ? mru_initalloc
    185 		      : mru_incalloc;
    186 
    187 	if (entries) {
    188 		chunk = eallocarray(entries, sizeof(*chunk));
    189 		mru_alloc += entries;
    190 		for (chunk += entries; entries; entries--)
    191 			mon_free_entry(--chunk);
    192 
    193 		mon_mem_increments++;
    194 	}
    195 }
    196 
    197 
    198 /*
    199  * mon_start - start up the monitoring software
    200  */
    201 void
    202 mon_start(
    203 	int mode
    204 	)
    205 {
    206 	size_t octets;
    207 	u_int min_hash_slots;
    208 
    209 	if (MON_OFF == mode)		/* MON_OFF is 0 */
    210 		return;
    211 	if (mon_enabled) {
    212 		mon_enabled |= mode;
    213 		return;
    214 	}
    215 	if (0 == mon_mem_increments)
    216 		mon_getmoremem();
    217 	/*
    218 	 * Select the MRU hash table size to limit the average count
    219 	 * per bucket at capacity (mru_maxdepth) to 8, if possible
    220 	 * given our hash is limited to 16 bits.
    221 	 */
    222 	min_hash_slots = (mru_maxdepth / 8) + 1;
    223 	mon_hash_bits = 0;
    224 	while (min_hash_slots >>= 1)
    225 		mon_hash_bits++;
    226 	mon_hash_bits = max(4, mon_hash_bits);
    227 	mon_hash_bits = min(16, mon_hash_bits);
    228 	octets = sizeof(*mon_hash) * MON_HASH_SIZE;
    229 	mon_hash = erealloc_zero(mon_hash, octets, 0);
    230 
    231 	mon_enabled = mode;
    232 }
    233 
    234 
    235 /*
    236  * mon_stop - stop the monitoring software
    237  */
    238 void
    239 mon_stop(
    240 	int mode
    241 	)
    242 {
    243 	mon_entry *mon;
    244 
    245 	if (MON_OFF == mon_enabled)
    246 		return;
    247 	if ((mon_enabled & mode) == 0 || mode == MON_OFF)
    248 		return;
    249 
    250 	mon_enabled &= ~mode;
    251 	if (mon_enabled != MON_OFF)
    252 		return;
    253 
    254 	/*
    255 	 * Move everything on the MRU list to the free list quickly,
    256 	 * without bothering to remove each from either the MRU list or
    257 	 * the hash table.
    258 	 */
    259 	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
    260 		mon_free_entry(mon);
    261 	ITER_DLIST_END()
    262 
    263 	/* empty the MRU list and hash table. */
    264 	mru_entries = 0;
    265 	INIT_DLIST(mon_mru_list, mru);
    266 	zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE);
    267 }
    268 
    269 
    270 /*
    271  * mon_clearinterface -- remove mru entries referring to a local address
    272  *			 which is going away.
    273  */
    274 void
    275 mon_clearinterface(
    276 	endpt *lcladr
    277 	)
    278 {
    279 	mon_entry *mon;
    280 
    281 	/* iterate mon over mon_mru_list */
    282 	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
    283 		if (mon->lcladr == lcladr) {
    284 			/* remove from mru list */
    285 			UNLINK_DLIST(mon, mru);
    286 			/* remove from hash list, adjust mru_entries */
    287 			remove_from_hash(mon);
    288 			/* put on free list */
    289 			mon_free_entry(mon);
    290 		}
    291 	ITER_DLIST_END()
    292 }
    293 
    294 
    295 /*
    296  * ntp_monitor - record stats about this packet
    297  *
    298  * Returns supplied restriction flags, with RES_LIMITED and RES_KOD
    299  * cleared unless the packet should not be responded to normally
    300  * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD).
    301  * The returned flags are saved in the MRU entry, so that it reflects
    302  * whether the last packet from that source triggered rate limiting,
    303  * and if so, possible KoD response.  This implies you can not tell
    304  * whether a given address is eligible for rate limiting/KoD from the
    305  * monlist restrict bits, only whether or not the last packet triggered
    306  * such responses.  ntpdc -c reslist lets you see whether RES_LIMITED
    307  * or RES_KOD is lit for a particular address before ntp_monitor()'s
    308  * typical dousing.
    309  */
    310 u_short
    311 ntp_monitor(
    312 	struct recvbuf *rbufp,
    313 	u_short	flags
    314 	)
    315 {
    316 	l_fp		interval_fp;
    317 	struct pkt *	pkt;
    318 	mon_entry *	mon;
    319 	mon_entry *	oldest;
    320 	int		oldest_age;
    321 	u_int		hash;
    322 	u_short		restrict_mask;
    323 	u_char		mode;
    324 	u_char		version;
    325 	int		interval;
    326 	int		head;		/* headway increment */
    327 	int		leak;		/* new headway */
    328 	int		limit;		/* average threshold */
    329 
    330 	REQUIRE(rbufp != NULL);
    331 
    332 	if (mon_enabled == MON_OFF)
    333 		return ~(RES_LIMITED | RES_KOD) & flags;
    334 
    335 	pkt = &rbufp->recv_pkt;
    336 	hash = MON_HASH(&rbufp->recv_srcadr);
    337 	mode = PKT_MODE(pkt->li_vn_mode);
    338 	version = PKT_VERSION(pkt->li_vn_mode);
    339 	mon = mon_hash[hash];
    340 
    341 	/*
    342 	 * We keep track of all traffic for a given IP in one entry,
    343 	 * otherwise cron'ed ntpdate or similar evades RES_LIMITED.
    344 	 */
    345 
    346 	for (; mon != NULL; mon = mon->hash_next)
    347 		if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr))
    348 			break;
    349 
    350 	if (mon != NULL) {
    351 		interval_fp = rbufp->recv_time;
    352 		L_SUB(&interval_fp, &mon->last);
    353 		/* add one-half second to round up */
    354 		L_ADDUF(&interval_fp, 0x80000000);
    355 		interval = interval_fp.l_i;
    356 		mon->last = rbufp->recv_time;
    357 		NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr);
    358 		mon->count++;
    359 		restrict_mask = flags;
    360 		mon->vn_mode = VN_MODE(version, mode);
    361 
    362 		/* Shuffle to the head of the MRU list. */
    363 		UNLINK_DLIST(mon, mru);
    364 		LINK_DLIST(mon_mru_list, mon, mru);
    365 
    366 		/*
    367 		 * At this point the most recent arrival is first in the
    368 		 * MRU list.  Decrease the counter by the headway, but
    369 		 * not less than zero.
    370 		 */
    371 		mon->leak -= interval;
    372 		mon->leak = max(0, mon->leak);
    373 		head = 1 << ntp_minpoll;
    374 		leak = mon->leak + head;
    375 		limit = NTP_SHIFT * head;
    376 
    377 		DPRINTF(2, ("MRU: interval %d headway %d limit %d\n",
    378 			    interval, leak, limit));
    379 
    380 		/*
    381 		 * If the minimum and average thresholds are not
    382 		 * exceeded, douse the RES_LIMITED and RES_KOD bits and
    383 		 * increase the counter by the headway increment.  Note
    384 		 * that we give a 1-s grace for the minimum threshold
    385 		 * and a 2-s grace for the headway increment.  If one or
    386 		 * both thresholds are exceeded and the old counter is
    387 		 * less than the average threshold, set the counter to
    388 		 * the average threshold plus the increment and leave
    389 		 * the RES_LIMITED and RES_KOD bits lit. Otherwise,
    390 		 * leave the counter alone and douse the RES_KOD bit.
    391 		 * This rate-limits the KoDs to no less than the average
    392 		 * headway.
    393 		 */
    394 		if (interval + 1 >= ntp_minpkt && leak < limit) {
    395 			mon->leak = leak - 2;
    396 			restrict_mask &= ~(RES_LIMITED | RES_KOD);
    397 		} else if (mon->leak < limit)
    398 			mon->leak = limit + head;
    399 		else
    400 			restrict_mask &= ~RES_KOD;
    401 
    402 		mon->flags = restrict_mask;
    403 
    404 		return mon->flags;
    405 	}
    406 
    407 	/*
    408 	 * If we got here, this is the first we've heard of this
    409 	 * guy.  Get him some memory, either from the free list
    410 	 * or from the tail of the MRU list.
    411 	 *
    412 	 * The following ntp.conf "mru" knobs come into play determining
    413 	 * the depth (or count) of the MRU list:
    414 	 * - mru_mindepth ("mru mindepth") is a floor beneath which
    415 	 *   entries are kept without regard to their age.  The
    416 	 *   default is 600 which matches the longtime implementation
    417 	 *   limit on the total number of entries.
    418 	 * - mru_maxage ("mru maxage") is a ceiling on the age in
    419 	 *   seconds of entries.  Entries older than this are
    420 	 *   reclaimed once mon_mindepth is exceeded.  64s default.
    421 	 *   Note that entries older than this can easily survive
    422 	 *   as they are reclaimed only as needed.
    423 	 * - mru_maxdepth ("mru maxdepth") is a hard limit on the
    424 	 *   number of entries.
    425 	 * - "mru maxmem" sets mru_maxdepth to the number of entries
    426 	 *   which fit in the given number of kilobytes.  The default is
    427 	 *   1024, or 1 megabyte.
    428 	 * - mru_initalloc ("mru initalloc" sets the count of the
    429 	 *   initial allocation of MRU entries.
    430 	 * - "mru initmem" sets mru_initalloc in units of kilobytes.
    431 	 *   The default is 4.
    432 	 * - mru_incalloc ("mru incalloc" sets the number of entries to
    433 	 *   allocate on-demand each time the free list is empty.
    434 	 * - "mru incmem" sets mru_incalloc in units of kilobytes.
    435 	 *   The default is 4.
    436 	 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in
    437 	 * ntp.conf controls.  Similarly for "mru initalloc" and "mru
    438 	 * initmem", and for "mru incalloc" and "mru incmem".
    439 	 */
    440 	if (mru_entries < mru_mindepth) {
    441 		if (NULL == mon_free)
    442 			mon_getmoremem();
    443 		UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
    444 	} else {
    445 		oldest = TAIL_DLIST(mon_mru_list, mru);
    446 		oldest_age = 0;		/* silence uninit warning */
    447 		if (oldest != NULL) {
    448 			interval_fp = rbufp->recv_time;
    449 			L_SUB(&interval_fp, &oldest->last);
    450 			/* add one-half second to round up */
    451 			L_ADDUF(&interval_fp, 0x80000000);
    452 			oldest_age = interval_fp.l_i;
    453 		}
    454 		/* note -1 is legal for mru_maxage (disables) */
    455 		if (oldest != NULL && mru_maxage < oldest_age) {
    456 			mon_reclaim_entry(oldest);
    457 			mon = oldest;
    458 		} else if (mon_free != NULL || mru_alloc <
    459 			   mru_maxdepth) {
    460 			if (NULL == mon_free)
    461 				mon_getmoremem();
    462 			UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
    463 		/* Preempt from the MRU list if old enough. */
    464 		} else if (ntp_random() / (2. * FRAC) >
    465 			   (double)oldest_age / mon_age) {
    466 			return ~(RES_LIMITED | RES_KOD) & flags;
    467 		} else {
    468 			mon_reclaim_entry(oldest);
    469 			mon = oldest;
    470 		}
    471 	}
    472 
    473 	INSIST(mon != NULL);
    474 
    475 	/*
    476 	 * Got one, initialize it
    477 	 */
    478 	mru_entries++;
    479 	mru_peakentries = max(mru_peakentries, mru_entries);
    480 	mon->last = rbufp->recv_time;
    481 	mon->first = mon->last;
    482 	mon->count = 1;
    483 	mon->flags = ~(RES_LIMITED | RES_KOD) & flags;
    484 	mon->leak = 0;
    485 	memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr));
    486 	mon->vn_mode = VN_MODE(version, mode);
    487 	mon->lcladr = rbufp->dstadr;
    488 	mon->cast_flags = (u_char)(((rbufp->dstadr->flags &
    489 	    INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST
    490 	    : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST);
    491 
    492 	/*
    493 	 * Drop him into front of the hash table. Also put him on top of
    494 	 * the MRU list.
    495 	 */
    496 	LINK_SLIST(mon_hash[hash], mon, hash_next);
    497 	LINK_DLIST(mon_mru_list, mon, mru);
    498 
    499 	return mon->flags;
    500 }
    501 
    502 
    503