/* * ntp_monitor - monitor ntpd statistics */ #ifdef HAVE_CONFIG_H # include #endif #include "ntpd.h" #include "ntp_io.h" #include "ntp_if.h" #include "ntp_lists.h" #include "ntp_stdlib.h" #include #include #include #ifdef HAVE_SYS_IOCTL_H # include #endif /* * Record statistics based on source address, mode and version. The * receive procedure calls us with the incoming rbufp before it does * anything else. While at it, implement rate controls for inbound * traffic. * * Each entry is doubly linked into two lists, a hash table and a most- * recently-used (MRU) list. When a packet arrives it is looked up in * the hash table. If found, the statistics are updated and the entry * relinked at the head of the MRU list. If not found, a new entry is * allocated, initialized and linked into both the hash table and at the * head of the MRU list. * * Memory is usually allocated by grabbing a big chunk of new memory and * cutting it up into littler pieces. The exception to this when we hit * the memory limit. Then we free memory by grabbing entries off the * tail for the MRU list, unlinking from the hash table, and * reinitializing. * * INC_MONLIST is the default allocation granularity in entries. * INIT_MONLIST is the default initial allocation in entries. */ #ifdef MONMEMINC /* old name */ # define INC_MONLIST MONMEMINC #elif !defined(INC_MONLIST) # define INC_MONLIST (4 * 1024 / sizeof(mon_entry)) #endif #ifndef INIT_MONLIST # define INIT_MONLIST (4 * 1024 / sizeof(mon_entry)) #endif #ifndef MRU_MAXDEPTH_DEF # define MRU_MAXDEPTH_DEF (1024 * 1024 / sizeof(mon_entry)) #endif /* * Hashing stuff */ u_char mon_hash_bits; /* * Pointers to the hash table and the MRU list. Memory for the hash * table is allocated only if monitoring is enabled. */ mon_entry ** mon_hash; /* MRU hash table */ mon_entry mon_mru_list; /* mru listhead */ /* * List of free structures structures, and counters of in-use and total * structures. The free structures are linked with the hash_next field. */ static mon_entry *mon_free; /* free list or null if none */ u_int mru_alloc; /* mru list + free list count */ u_int mru_entries; /* mru list count */ u_int mru_peakentries; /* highest mru_entries seen */ u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */ u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */ static u_int mon_mem_increments; /* times called malloc() */ /* * Parameters of the RES_LIMITED restriction option. We define headway * as the idle time between packets. A packet is discarded if the * headway is less than the minimum, as well as if the average headway * is less than eight times the increment. */ int ntp_minpkt = NTP_MINPKT; /* minimum seconds between */ /* requests from a client */ u_char ntp_minpoll = NTP_MINPOLL; /* minimum average log2 seconds */ /* between client requests */ /* * Initialization state. We may be monitoring, we may not. If * we aren't, we may not even have allocated any memory yet. */ u_int mon_enabled; /* enable switch */ u_int mru_mindepth = 600; /* preempt above this */ int mru_maxage = 64; /* for entries older than */ u_int mru_maxdepth = /* MRU count hard limit */ MRU_MAXDEPTH_DEF; int mon_age = 3000; /* preemption limit */ static void mon_getmoremem(void); static void remove_from_hash(mon_entry *); static inline void mon_free_entry(mon_entry *); static inline void mon_reclaim_entry(mon_entry *); /* * init_mon - initialize monitoring global data */ void init_mon(void) { /* * Don't do much of anything here. We don't allocate memory * until mon_start(). */ mon_enabled = MON_OFF; INIT_DLIST(mon_mru_list, mru); } /* * remove_from_hash - removes an entry from the address hash table and * decrements mru_entries. */ static void remove_from_hash( mon_entry *mon ) { u_int hash; mon_entry *punlinked; mru_entries--; hash = MON_HASH(&mon->rmtadr); UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next, mon_entry); ENSURE(punlinked == mon); } static inline void mon_free_entry( mon_entry *m ) { ZERO(*m); LINK_SLIST(mon_free, m, hash_next); } /* * mon_reclaim_entry - Remove an entry from the MRU list and from the * hash array, then zero-initialize it. Indirectly * decrements mru_entries. * The entry is prepared to be reused. Before return, in * remove_from_hash(), mru_entries is decremented. It is the caller's * responsibility to increment it again. */ static inline void mon_reclaim_entry( mon_entry *m ) { DEBUG_INSIST(NULL != m); UNLINK_DLIST(m, mru); remove_from_hash(m); ZERO(*m); } /* * mon_getmoremem - get more memory and put it on the free list */ static void mon_getmoremem(void) { mon_entry *chunk; u_int entries; entries = (0 == mon_mem_increments) ? mru_initalloc : mru_incalloc; if (entries) { chunk = eallocarray(entries, sizeof(*chunk)); mru_alloc += entries; for (chunk += entries; entries; entries--) mon_free_entry(--chunk); mon_mem_increments++; } } /* * mon_start - start up the monitoring software */ void mon_start( int mode ) { size_t octets; u_int min_hash_slots; if (MON_OFF == mode) /* MON_OFF is 0 */ return; if (mon_enabled) { mon_enabled |= mode; return; } if (0 == mon_mem_increments) mon_getmoremem(); /* * Select the MRU hash table size to limit the average count * per bucket at capacity (mru_maxdepth) to 8, if possible * given our hash is limited to 16 bits. */ min_hash_slots = (mru_maxdepth / 8) + 1; mon_hash_bits = 0; while (min_hash_slots >>= 1) mon_hash_bits++; mon_hash_bits = max(4, mon_hash_bits); mon_hash_bits = min(16, mon_hash_bits); octets = sizeof(*mon_hash) * MON_HASH_SIZE; mon_hash = erealloc_zero(mon_hash, octets, 0); mon_enabled = mode; } /* * mon_stop - stop the monitoring software */ void mon_stop( int mode ) { mon_entry *mon; if (MON_OFF == mon_enabled) return; if ((mon_enabled & mode) == 0 || mode == MON_OFF) return; mon_enabled &= ~mode; if (mon_enabled != MON_OFF) return; /* * Move everything on the MRU list to the free list quickly, * without bothering to remove each from either the MRU list or * the hash table. */ ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) mon_free_entry(mon); ITER_DLIST_END() /* empty the MRU list and hash table. */ mru_entries = 0; INIT_DLIST(mon_mru_list, mru); zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE); } /* * mon_clearinterface -- remove mru entries referring to a local address * which is going away. */ void mon_clearinterface( endpt *lcladr ) { mon_entry *mon; /* iterate mon over mon_mru_list */ ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) if (mon->lcladr == lcladr) { /* remove from mru list */ UNLINK_DLIST(mon, mru); /* remove from hash list, adjust mru_entries */ remove_from_hash(mon); /* put on free list */ mon_free_entry(mon); } ITER_DLIST_END() } /* * ntp_monitor - record stats about this packet * * Returns supplied restriction flags, with RES_LIMITED and RES_KOD * cleared unless the packet should not be responded to normally * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD). * The returned flags are saved in the MRU entry, so that it reflects * whether the last packet from that source triggered rate limiting, * and if so, possible KoD response. This implies you can not tell * whether a given address is eligible for rate limiting/KoD from the * monlist restrict bits, only whether or not the last packet triggered * such responses. ntpdc -c reslist lets you see whether RES_LIMITED * or RES_KOD is lit for a particular address before ntp_monitor()'s * typical dousing. */ u_short ntp_monitor( struct recvbuf *rbufp, u_short flags ) { l_fp interval_fp; struct pkt * pkt; mon_entry * mon; mon_entry * oldest; int oldest_age; u_int hash; u_short restrict_mask; u_char mode; u_char version; int interval; int head; /* headway increment */ int leak; /* new headway */ int limit; /* average threshold */ REQUIRE(rbufp != NULL); if (mon_enabled == MON_OFF) return ~(RES_LIMITED | RES_KOD) & flags; pkt = &rbufp->recv_pkt; hash = MON_HASH(&rbufp->recv_srcadr); mode = PKT_MODE(pkt->li_vn_mode); version = PKT_VERSION(pkt->li_vn_mode); mon = mon_hash[hash]; /* * We keep track of all traffic for a given IP in one entry, * otherwise cron'ed ntpdate or similar evades RES_LIMITED. */ for (; mon != NULL; mon = mon->hash_next) if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) break; if (mon != NULL) { interval_fp = rbufp->recv_time; L_SUB(&interval_fp, &mon->last); /* add one-half second to round up */ L_ADDUF(&interval_fp, 0x80000000); interval = interval_fp.l_i; mon->last = rbufp->recv_time; NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr); mon->count++; restrict_mask = flags; mon->vn_mode = VN_MODE(version, mode); /* Shuffle to the head of the MRU list. */ UNLINK_DLIST(mon, mru); LINK_DLIST(mon_mru_list, mon, mru); /* * At this point the most recent arrival is first in the * MRU list. Decrease the counter by the headway, but * not less than zero. */ mon->leak -= interval; mon->leak = max(0, mon->leak); head = 1 << ntp_minpoll; leak = mon->leak + head; limit = NTP_SHIFT * head; DPRINTF(2, ("MRU: interval %d headway %d limit %d\n", interval, leak, limit)); /* * If the minimum and average thresholds are not * exceeded, douse the RES_LIMITED and RES_KOD bits and * increase the counter by the headway increment. Note * that we give a 1-s grace for the minimum threshold * and a 2-s grace for the headway increment. If one or * both thresholds are exceeded and the old counter is * less than the average threshold, set the counter to * the average threshold plus the increment and leave * the RES_LIMITED and RES_KOD bits lit. Otherwise, * leave the counter alone and douse the RES_KOD bit. * This rate-limits the KoDs to no less than the average * headway. */ if (interval + 1 >= ntp_minpkt && leak < limit) { mon->leak = leak - 2; restrict_mask &= ~(RES_LIMITED | RES_KOD); } else if (mon->leak < limit) mon->leak = limit + head; else restrict_mask &= ~RES_KOD; mon->flags = restrict_mask; return mon->flags; } /* * If we got here, this is the first we've heard of this * guy. Get him some memory, either from the free list * or from the tail of the MRU list. * * The following ntp.conf "mru" knobs come into play determining * the depth (or count) of the MRU list: * - mru_mindepth ("mru mindepth") is a floor beneath which * entries are kept without regard to their age. The * default is 600 which matches the longtime implementation * limit on the total number of entries. * - mru_maxage ("mru maxage") is a ceiling on the age in * seconds of entries. Entries older than this are * reclaimed once mon_mindepth is exceeded. 64s default. * Note that entries older than this can easily survive * as they are reclaimed only as needed. * - mru_maxdepth ("mru maxdepth") is a hard limit on the * number of entries. * - "mru maxmem" sets mru_maxdepth to the number of entries * which fit in the given number of kilobytes. The default is * 1024, or 1 megabyte. * - mru_initalloc ("mru initalloc" sets the count of the * initial allocation of MRU entries. * - "mru initmem" sets mru_initalloc in units of kilobytes. * The default is 4. * - mru_incalloc ("mru incalloc" sets the number of entries to * allocate on-demand each time the free list is empty. * - "mru incmem" sets mru_incalloc in units of kilobytes. * The default is 4. * Whichever of "mru maxmem" or "mru maxdepth" occurs last in * ntp.conf controls. Similarly for "mru initalloc" and "mru * initmem", and for "mru incalloc" and "mru incmem". */ if (mru_entries < mru_mindepth) { if (NULL == mon_free) mon_getmoremem(); UNLINK_HEAD_SLIST(mon, mon_free, hash_next); } else { oldest = TAIL_DLIST(mon_mru_list, mru); oldest_age = 0; /* silence uninit warning */ if (oldest != NULL) { interval_fp = rbufp->recv_time; L_SUB(&interval_fp, &oldest->last); /* add one-half second to round up */ L_ADDUF(&interval_fp, 0x80000000); oldest_age = interval_fp.l_i; } /* note -1 is legal for mru_maxage (disables) */ if (oldest != NULL && mru_maxage < oldest_age) { mon_reclaim_entry(oldest); mon = oldest; } else if (mon_free != NULL || mru_alloc < mru_maxdepth) { if (NULL == mon_free) mon_getmoremem(); UNLINK_HEAD_SLIST(mon, mon_free, hash_next); /* Preempt from the MRU list if old enough. */ } else if (ntp_uurandom() > (double)oldest_age / mon_age) { return ~(RES_LIMITED | RES_KOD) & flags; } else { mon_reclaim_entry(oldest); mon = oldest; } } INSIST(mon != NULL); /* * Got one, initialize it */ mru_entries++; mru_peakentries = max(mru_peakentries, mru_entries); mon->last = rbufp->recv_time; mon->first = mon->last; mon->count = 1; mon->flags = ~(RES_LIMITED | RES_KOD) & flags; mon->leak = 0; memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr)); mon->vn_mode = VN_MODE(version, mode); mon->lcladr = rbufp->dstadr; mon->cast_flags = (u_char)(((rbufp->dstadr->flags & INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST); /* * Drop him into front of the hash table. Also put him on top of * the MRU list. */ LINK_SLIST(mon_hash[hash], mon, hash_next); LINK_DLIST(mon_mru_list, mon, mru); return mon->flags; }