/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_CALLO_H #define _SYS_CALLO_H #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #ifdef _KERNEL typedef struct callout_list callout_list_t; /* * The callout mechanism provides general-purpose event scheduling: * an arbitrary function is called in a specified amount of time. * The expiration time for a callout is kept in its callout list * structure. */ typedef struct callout { struct callout *c_idnext; /* next in ID hash, or on freelist */ struct callout *c_idprev; /* prev in ID hash */ struct callout *c_clnext; /* next in callout list */ struct callout *c_clprev; /* prev in callout list */ callout_id_t c_xid; /* extended callout ID; see below */ callout_list_t *c_list; /* callout list */ void (*c_func)(void *); /* function to call */ void *c_arg; /* argument to function */ kthread_t *c_executor; /* executing thread */ kcondvar_t c_done; /* signal callout completion */ ushort_t c_waiting; /* untimeout waiting flag */ } callout_t; /* * The callout ID (callout_id_t) uniquely identifies a callout. The callout * ID is always 64 bits internally. The lower 32 bits contain an ID value. * The upper 32 bits contain a generation number and flags. When the ID value * wraps the generation number is incremented during ID generation. This * protects callers from ID collisions that can happen as a result of the wrap. * * The kernel internal interface, timeout_generic(), always returns a * callout_id_t. But the legacy interfaces, timeout() and realtime_timeout() * return a timeout_id_t. On a 64-bit system, timeout_id_t is also 64 bits. * So, the full 64-bit ID (sans the flags) can be returned. However, on 32-bit * systems, timeout_id_t is 32 bits. So, only the lower 32 bits can be * returned. In such cases, a default generation number of 0 is assigned to * the legacy IDs. * * The lower 32-bit ID space is partitioned into two spaces - one for * short-term callouts and one for long-term. * * Here is the bit layout for the callout ID: * * 63 62 61 ... 32 31 30 29 .. X+1 X ... 1 0 * ----------------------------------------------------------------------- * | Free | Exec | Generation | Long | Counter | ID bits | Table | Type | * | | | number | term | High | | number | | * ----------------------------------------------------------------------- * * Free: * This bit indicates that this callout has been freed. This is for * debugging purposes. * * Exec(uting): * This is the executing bit which is only set in the extended callout * ID. This bit indicates that the callout handler is currently being * executed. * * Generation number: * This is the generation part of the ID. * * Long term: * This bit indicates whether this is a short-term or a long-term callout. * The long-term bit exists to address the problem of callout ID collision * on 32-bit systems. This is an issue because the system typically * generates a large number of timeout() requests, which means that callout * IDs eventually get recycled. Most timeouts are very short-lived, so that * ID recycling isn't a problem; but there are a handful of timeouts which * are sufficiently long-lived to see their own IDs reused. We use the * long-term bit to partition the ID namespace into pieces; the short-term * space gets all the heavy traffic and can wrap frequently (i.e., on the * order of a day) with no ill effects; the long-term space gets very little * traffic and thus never wraps. That said, we need to future proof callouts * in case 32-bit systems grow in size and are able to consume callout IDs * at faster rates. So, we should make all the kernel clients that use * callouts to use the internal interface so that they can use IDs outside * of the legacy space with a proper generation number. * * Counter High + ID counter bits: * These bits represent the actual ID bits in the callout ID. * The highest bit of the running counter is always set; this ensures that * the callout ID is always non-zero, thus eliminating the need for an * explicit wrap-around test during ID generation. * * Table number: * These bits carry the table number for the callout table where the callout * is queued. Each CPU has its own callout table. So, the callout tables are * numbered from 0 - (max_ncpus - 1). Because max_ncpus is different on * different systems, the actual number of table number bits will vary * accordingly. And so will the ID counter bits. * * Type: * This bit represents the callout (table) type. Each CPU has one realtime * and one normal callout table. */ #define CALLOUT_ID_FREE 0x8000000000000000ULL #define CALLOUT_EXECUTING 0x4000000000000000ULL #define CALLOUT_ID_FLAGS (CALLOUT_ID_FREE | CALLOUT_EXECUTING) #define CALLOUT_ID_MASK ~CALLOUT_ID_FLAGS #define CALLOUT_GENERATION_LOW 0x100000000ULL #define CALLOUT_LONGTERM 0x80000000 #define CALLOUT_COUNTER_HIGH 0x40000000 #define CALLOUT_TYPE_BITS 1 #define CALLOUT_NTYPES (1 << CALLOUT_TYPE_BITS) #define CALLOUT_TYPE_MASK (CALLOUT_NTYPES - 1) #define CALLOUT_COUNTER_SHIFT callout_table_bits #define CALLOUT_TABLE(t, f) (((f) << CALLOUT_TYPE_BITS) | (t)) #define CALLOUT_TABLE_NUM(ct) ((ct) - callout_table) #define CALLOUT_TABLE_SEQID(ct) (CALLOUT_TABLE_NUM(ct) >> CALLOUT_TYPE_BITS) /* * We assume that during any period of CALLOUT_LONGTERM_TICKS ticks, at most * (CALLOUT_COUNTER_HIGH / callout_counter_low) callouts will be generated. */ #define CALLOUT_LONGTERM_TICKS 0x4000UL #define CALLOUT_BUCKET_SHIFT 9 #define CALLOUT_BUCKETS (1 << CALLOUT_BUCKET_SHIFT) #define CALLOUT_BUCKET_MASK (CALLOUT_BUCKETS - 1) #define CALLOUT_HASH(x) ((x) & CALLOUT_BUCKET_MASK) #define CALLOUT_IDHASH(x) CALLOUT_HASH((x) >> CALLOUT_COUNTER_SHIFT) /* * The multiply by 0 and 1 below are cosmetic. Just to align things better * and make it more readable. The multiplications will be done at compile * time. */ #define CALLOUT_CLHASH(x) \ CALLOUT_HASH( \ ((x)>>(CALLOUT_BUCKET_SHIFT*0)) ^ \ ((x)>>(CALLOUT_BUCKET_SHIFT*1)) ^ \ ((x)>>(CALLOUT_BUCKET_SHIFT*2)) ^ \ ((x)>>(CALLOUT_BUCKET_SHIFT*3))) #define CALLOUT_ID_TO_TABLE(id) ((id) & callout_table_mask) #define CALLOUT_SHORT_ID(table) \ ((callout_id_t)(table) | CALLOUT_COUNTER_HIGH) #define CALLOUT_LONG_ID(table) \ (CALLOUT_SHORT_ID(table) | CALLOUT_LONGTERM) #define CALLOUT_THREADS 2 #define CALLOUT_REALTIME 0 /* realtime callout type */ #define CALLOUT_NORMAL 1 /* normal callout type */ /* * callout_t's are cache-aligned structures allocated from kmem caches. One kmem * cache is created per lgrp and is shared by all CPUs in that lgrp. Benefits: * - cache pages are mapped only in the TLBs of the CPUs of the lgrp * - data in cache pages is present only in those CPU caches * - memory access performance improves with locality-awareness in kmem * * The following structure is used to manage per-lgroup kmem caches. * * NOTE: Free callout_t's go to a callout table's freelist. CPUs map to callout * tables via their sequence IDs, not CPU IDs. DR operations can cause a * free list to have callouts from multiple lgrp caches. This takes away some * performance, but is no worse than if we did not use lgrp caches at all. */ typedef struct callout_cache { struct callout_cache *cc_next; /* link in the global list */ lgrp_handle_t cc_hand; /* lgroup handle */ kmem_cache_t *cc_cache; /* kmem cache pointer */ kmem_cache_t *cc_lcache; /* kmem cache pointer */ } callout_cache_t; /* * The callout hash structure is used for queueing both callouts and * callout lists. That is why the fields are declared as void *. */ typedef struct callout_hash { void *ch_head; void *ch_tail; } callout_hash_t; /* * CALLOUT_LIST_FLAG_FREE * Callout list is free. * CALLOUT_LIST_FLAG_ABSOLUTE * Callout list contains absolute timers. * CALLOUT_LIST_FLAG_HRESTIME * Callout list contains hrestime timers. * CALLOUT_LIST_FLAG_NANO * Callout list contains 1-nanosecond resolution callouts. * CALLOUT_LIST_FLAG_HEAPED * Callout list is present in the callout heap. * CALLOUT_LIST_FLAG_QUEUED * Callout list is present in the callout queue. */ #define CALLOUT_LIST_FLAG_FREE 0x1 #define CALLOUT_LIST_FLAG_ABSOLUTE 0x2 #define CALLOUT_LIST_FLAG_HRESTIME 0x4 #define CALLOUT_LIST_FLAG_NANO 0x8 #define CALLOUT_LIST_FLAG_HEAPED 0x10 #define CALLOUT_LIST_FLAG_QUEUED 0x20 struct callout_list { callout_list_t *cl_next; /* next in clhash */ callout_list_t *cl_prev; /* prev in clhash */ hrtime_t cl_expiration; /* expiration for callouts in list */ callout_hash_t cl_callouts; /* list of callouts */ int cl_flags; /* callout flags */ }; /* * Callout heap element. Each element in the heap stores the expiration * as well as the corresponding callout list. This is to avoid a lookup * of the callout list when the heap is processed. Because we store the * callout list pointer in the heap element, we have to always remove * a heap element and its callout list together. We cannot remove one * without the other. * * This structure's size must be a power of two because we want an * integral number of these to fit into a page. */ typedef struct callout_heap { hrtime_t ch_expiration; callout_list_t *ch_list; } callout_heap_t; /* * When the heap contains too many empty callout lists, it needs to be * cleaned up. The decision to clean up the heap is a function of the * number of empty entries and the heap size. Also, we don't want to * clean up small heaps. */ #define CALLOUT_MIN_REAP (CALLOUT_BUCKETS >> 3) #define CALLOUT_CLEANUP(ct) ((ct->ct_nreap >= callout_min_reap) && \ (ct->ct_nreap >= (ct->ct_heap_num >> 1))) /* * Per-callout table kstats. * * CALLOUT_TIMEOUTS * Callouts created since boot. * CALLOUT_TIMEOUTS_PENDING * Number of outstanding callouts. * CALLOUT_UNTIMEOUTS_UNEXPIRED * Number of cancelled callouts that have not expired. * CALLOUT_UNTIMEOUTS_EXECUTING * Number of cancelled callouts that were executing at the time of * cancellation. * CALLOUT_UNTIMEOUTS_EXPIRED * Number of cancelled callouts that had already expired at the time * of cancellations. * CALLOUT_EXPIRATIONS * Number of callouts that expired. * CALLOUT_ALLOCATIONS * Number of callout structures allocated. * CALLOUT_CLEANUPS * Number of times a callout table is cleaned up. */ typedef enum callout_stat_type { CALLOUT_TIMEOUTS, CALLOUT_TIMEOUTS_PENDING, CALLOUT_UNTIMEOUTS_UNEXPIRED, CALLOUT_UNTIMEOUTS_EXECUTING, CALLOUT_UNTIMEOUTS_EXPIRED, CALLOUT_EXPIRATIONS, CALLOUT_ALLOCATIONS, CALLOUT_CLEANUPS, CALLOUT_NUM_STATS } callout_stat_type_t; /* * Callout flags: * * CALLOUT_FLAG_ROUNDUP * Roundup the expiration time to the next resolution boundary. * If this flag is not specified, the expiration time is rounded down. * CALLOUT_FLAG_ABSOLUTE * Normally, the expiration passed to the timeout API functions is an * expiration interval. If this flag is specified, then it is * interpreted as the expiration time itself. * CALLOUT_FLAG_HRESTIME * Normally, callouts are not affected by changes to system time * (hrestime). This flag is used to create a callout that is affected * by system time. If system time changes, these timers must be * handled in a special way (see callout.c). These are used by condition * variables and LWP timers that need this behavior. * CALLOUT_FLAG_32BIT * Legacy interfaces timeout() and realtime_timeout() pass this flag * to timeout_generic() to indicate that a 32-bit ID should be allocated. */ #define CALLOUT_FLAG_ROUNDUP 0x1 #define CALLOUT_FLAG_ABSOLUTE 0x2 #define CALLOUT_FLAG_HRESTIME 0x4 #define CALLOUT_FLAG_32BIT 0x8 /* * On 32-bit systems, the legacy interfaces, timeout() and realtime_timeout(), * must pass CALLOUT_FLAG_32BIT to timeout_generic() so that a 32-bit ID * can be generated. */ #ifdef _LP64 #define CALLOUT_LEGACY 0 #else #define CALLOUT_LEGACY CALLOUT_FLAG_32BIT #endif /* * All of the state information associated with a callout table. * The fields are ordered with cache performance in mind. */ typedef struct callout_table { kmutex_t ct_mutex; /* protects all callout state */ callout_t *ct_free; /* free callout structures */ callout_list_t *ct_lfree; /* free callout list structures */ callout_id_t ct_short_id; /* most recently issued short-term ID */ callout_id_t ct_long_id; /* most recently issued long-term ID */ callout_hash_t *ct_idhash; /* ID hash chains */ callout_hash_t *ct_clhash; /* callout list hash */ kstat_named_t *ct_kstat_data; /* callout kstat data */ uint_t ct_type; /* callout table type */ uint_t ct_suspend; /* suspend count */ cyclic_id_t ct_cyclic; /* cyclic for this table */ callout_heap_t *ct_heap; /* callout expiration heap */ ulong_t ct_heap_num; /* occupied slots in the heap */ ulong_t ct_heap_max; /* end of the heap */ kmem_cache_t *ct_cache; /* callout kmem cache */ kmem_cache_t *ct_lcache; /* callout list kmem cache */ callout_id_t ct_gen_id; /* generation based ID */ callout_hash_t ct_expired; /* list of expired callout lists */ taskq_t *ct_taskq; /* taskq to execute normal callouts */ kstat_t *ct_kstats; /* callout kstats */ int ct_nreap; /* # heap entries that need reaping */ cyclic_id_t ct_qcyclic; /* cyclic for the callout queue */ callout_hash_t ct_queue; /* overflow queue of callouts */ #ifndef _LP64 char ct_pad[12]; /* cache alignment */ #endif /* * This structure should be aligned to a 64-byte (cache-line) * boundary. Make sure the padding is right for 32-bit as well * as 64-bit kernels. */ } callout_table_t; /* * Short hand definitions for the callout kstats. */ #define ct_timeouts \ ct_kstat_data[CALLOUT_TIMEOUTS].value.ui64 #define ct_timeouts_pending \ ct_kstat_data[CALLOUT_TIMEOUTS_PENDING].value.ui64 #define ct_untimeouts_unexpired \ ct_kstat_data[CALLOUT_UNTIMEOUTS_UNEXPIRED].value.ui64 #define ct_untimeouts_executing \ ct_kstat_data[CALLOUT_UNTIMEOUTS_EXECUTING].value.ui64 #define ct_untimeouts_expired \ ct_kstat_data[CALLOUT_UNTIMEOUTS_EXPIRED].value.ui64 #define ct_expirations \ ct_kstat_data[CALLOUT_EXPIRATIONS].value.ui64 #define ct_allocations \ ct_kstat_data[CALLOUT_ALLOCATIONS].value.ui64 #define ct_cleanups \ ct_kstat_data[CALLOUT_CLEANUPS].value.ui64 /* * CALLOUT_CHUNK is the minimum initial size of each heap, and the amount * by which a full heap is expanded to make room for new entries. */ #define CALLOUT_CHUNK (PAGESIZE / sizeof (callout_heap_t)) /* * CALLOUT_MIN_HEAP_SIZE defines the minimum size for the callout heap for * the whole system. */ #define CALLOUT_MIN_HEAP_SIZE (64 * 1024 * sizeof (callout_heap_t)) /* * CALLOUT_MEM_FRACTION defines the fraction of available physical memory that * can be allocated towards the callout heap for the whole system. */ #define CALLOUT_MEM_FRACTION 4096 #define CALLOUT_HEAP_PARENT(index) (((index) - 1) >> 1) #define CALLOUT_HEAP_RIGHT(index) (((index) + 1) << 1) #define CALLOUT_HEAP_LEFT(index) ((((index) + 1) << 1) - 1) #define CALLOUT_TCP_RESOLUTION 10000000ULL #define CALLOUT_ALIGN 64 /* cache line size */ #ifdef _LP64 #define CALLOUT_MAX_TICKS NSEC_TO_TICK(CY_INFINITY); #else #define CALLOUT_MAX_TICKS LONG_MAX #endif #define CALLOUT_TOLERANCE 200000 /* nanoseconds */ extern void callout_init(void); extern void membar_sync(void); extern void callout_cpu_online(cpu_t *); extern void callout_cpu_offline(cpu_t *); extern void callout_hrestime(void); #endif #ifdef __cplusplus } #endif #endif /* _SYS_CALLO_H */