1ca99cfddSTim J. Robbins /*- 2ca99cfddSTim J. Robbins * Copyright (c) 2004 Tim J. Robbins. 3ca99cfddSTim J. Robbins * All rights reserved. 4ca99cfddSTim J. Robbins * 5ca99cfddSTim J. Robbins * Redistribution and use in source and binary forms, with or without 6ca99cfddSTim J. Robbins * modification, are permitted provided that the following conditions 7ca99cfddSTim J. Robbins * are met: 8ca99cfddSTim J. Robbins * 1. Redistributions of source code must retain the above copyright 9ca99cfddSTim J. Robbins * notice, this list of conditions and the following disclaimer. 10ca99cfddSTim J. Robbins * 2. Redistributions in binary form must reproduce the above copyright 11ca99cfddSTim J. Robbins * notice, this list of conditions and the following disclaimer in the 12ca99cfddSTim J. Robbins * documentation and/or other materials provided with the distribution. 13ca99cfddSTim J. Robbins * 14ca99cfddSTim J. Robbins * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15ca99cfddSTim J. Robbins * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16ca99cfddSTim J. Robbins * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17ca99cfddSTim J. Robbins * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18ca99cfddSTim J. Robbins * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19ca99cfddSTim J. Robbins * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20ca99cfddSTim J. Robbins * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21ca99cfddSTim J. Robbins * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22ca99cfddSTim J. Robbins * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23ca99cfddSTim J. Robbins * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24ca99cfddSTim J. Robbins * SUCH DAMAGE. 25ca99cfddSTim J. Robbins */ 26ca99cfddSTim J. Robbins /* 27ca99cfddSTim J. Robbins * "Set of characters" ADT implemented as a splay tree of extents, with 28ca99cfddSTim J. Robbins * a lookup table cache to simplify looking up the first bunch of 29ca99cfddSTim J. Robbins * characters (which are presumably more common than others). 30ca99cfddSTim J. Robbins */ 31ca99cfddSTim J. Robbins 32ca99cfddSTim J. Robbins #include <sys/cdefs.h> 33ca99cfddSTim J. Robbins __FBSDID("$FreeBSD$"); 34ca99cfddSTim J. Robbins 35ca99cfddSTim J. Robbins #include <assert.h> 36ca99cfddSTim J. Robbins #include <stdbool.h> 37ca99cfddSTim J. Robbins #include <stdlib.h> 38ca99cfddSTim J. Robbins #include <wchar.h> 39ca99cfddSTim J. Robbins #include <wctype.h> 40ca99cfddSTim J. Robbins #include "cset.h" 41ca99cfddSTim J. Robbins 42ca99cfddSTim J. Robbins static struct csnode * cset_delete(struct csnode *, wchar_t); 43ca99cfddSTim J. Robbins static __inline int cset_rangecmp(struct csnode *, wchar_t); 44ca99cfddSTim J. Robbins static struct csnode * cset_splay(struct csnode *, wchar_t); 45ca99cfddSTim J. Robbins 46ca99cfddSTim J. Robbins /* 47ca99cfddSTim J. Robbins * cset_alloc -- 48ca99cfddSTim J. Robbins * Allocate a set of characters. 49ca99cfddSTim J. Robbins */ 50ca99cfddSTim J. Robbins struct cset * 51ca99cfddSTim J. Robbins cset_alloc(void) 52ca99cfddSTim J. Robbins { 53ca99cfddSTim J. Robbins struct cset *cs; 54ca99cfddSTim J. Robbins 55ca99cfddSTim J. Robbins if ((cs = malloc(sizeof(*cs))) == NULL) 56ca99cfddSTim J. Robbins return (NULL); 57ca99cfddSTim J. Robbins cs->cs_root = NULL; 58ca99cfddSTim J. Robbins cs->cs_classes = NULL; 59ca99cfddSTim J. Robbins cs->cs_havecache = false; 60ca99cfddSTim J. Robbins return (cs); 61ca99cfddSTim J. Robbins } 62ca99cfddSTim J. Robbins 63ca99cfddSTim J. Robbins /* 64ca99cfddSTim J. Robbins * cset_add -- 65ca99cfddSTim J. Robbins * Add a character to the set. 66ca99cfddSTim J. Robbins */ 67ca99cfddSTim J. Robbins bool 68ca99cfddSTim J. Robbins cset_add(struct cset *cs, wchar_t ch) 69ca99cfddSTim J. Robbins { 70ca99cfddSTim J. Robbins struct csnode *csn, *ncsn; 71ca99cfddSTim J. Robbins wchar_t oval; 72ca99cfddSTim J. Robbins 73ca99cfddSTim J. Robbins cs->cs_havecache = false; 74ca99cfddSTim J. Robbins 75ca99cfddSTim J. Robbins /* 76ca99cfddSTim J. Robbins * Inserting into empty tree; new item becomes the root. 77ca99cfddSTim J. Robbins */ 78ca99cfddSTim J. Robbins if (cs->cs_root == NULL) { 79ca99cfddSTim J. Robbins csn = malloc(sizeof(*cs->cs_root)); 80ca99cfddSTim J. Robbins if (csn == NULL) 81ca99cfddSTim J. Robbins return (false); 82ca99cfddSTim J. Robbins csn->csn_left = csn->csn_right = NULL; 83ca99cfddSTim J. Robbins csn->csn_min = csn->csn_max = ch; 84ca99cfddSTim J. Robbins cs->cs_root = csn; 85ca99cfddSTim J. Robbins return (true); 86ca99cfddSTim J. Robbins } 87ca99cfddSTim J. Robbins 88ca99cfddSTim J. Robbins /* 89ca99cfddSTim J. Robbins * Splay to check whether the item already exists, and otherwise, 90ca99cfddSTim J. Robbins * where we should put it. 91ca99cfddSTim J. Robbins */ 92ca99cfddSTim J. Robbins csn = cs->cs_root = cset_splay(cs->cs_root, ch); 93ca99cfddSTim J. Robbins 94ca99cfddSTim J. Robbins /* 95ca99cfddSTim J. Robbins * Easy cases where we can avoid allocating a new node: 96ca99cfddSTim J. Robbins * (a) node already exists. 97ca99cfddSTim J. Robbins * (b) we can lower the extent's "min" to accomodate this 98ca99cfddSTim J. Robbins * character without having to coalesce. 99ca99cfddSTim J. Robbins * (c) we can raise the extent's "max" without having 100ca99cfddSTim J. Robbins * to coalesce. 101ca99cfddSTim J. Robbins */ 102ca99cfddSTim J. Robbins if (cset_rangecmp(csn, ch) == 0) 103ca99cfddSTim J. Robbins return (true); 104ca99cfddSTim J. Robbins if (ch + 1 == csn->csn_min && (csn->csn_left == NULL || 105ca99cfddSTim J. Robbins ch > csn->csn_left->csn_max + 1)) { 106ca99cfddSTim J. Robbins csn->csn_min--; 107ca99cfddSTim J. Robbins return (true); 108ca99cfddSTim J. Robbins } 109ca99cfddSTim J. Robbins if (ch == csn->csn_max + 1 && (csn->csn_right == NULL || 110ca99cfddSTim J. Robbins ch + 1 < csn->csn_right->csn_min)) { 111ca99cfddSTim J. Robbins csn->csn_max++; 112ca99cfddSTim J. Robbins return (true); 113ca99cfddSTim J. Robbins } 114ca99cfddSTim J. Robbins 115ca99cfddSTim J. Robbins /* 116ca99cfddSTim J. Robbins * Allocate a new node and link it into the tree as a direct 117ca99cfddSTim J. Robbins * child of the root. 118ca99cfddSTim J. Robbins */ 119ca99cfddSTim J. Robbins ncsn = malloc(sizeof(*ncsn)); 120ca99cfddSTim J. Robbins if (ncsn == NULL) 121ca99cfddSTim J. Robbins return (false); 122ca99cfddSTim J. Robbins ncsn->csn_min = ncsn->csn_max = ch; 123ca99cfddSTim J. Robbins if (cset_rangecmp(csn, ch) < 0) { 124ca99cfddSTim J. Robbins ncsn->csn_left = csn->csn_left; 125ca99cfddSTim J. Robbins ncsn->csn_right = csn; 126ca99cfddSTim J. Robbins csn->csn_left = NULL; 127ca99cfddSTim J. Robbins } else { 128ca99cfddSTim J. Robbins ncsn->csn_right = csn->csn_right; 129ca99cfddSTim J. Robbins ncsn->csn_left = csn; 130ca99cfddSTim J. Robbins csn->csn_right = NULL; 131ca99cfddSTim J. Robbins } 132ca99cfddSTim J. Robbins cs->cs_root = ncsn; 133ca99cfddSTim J. Robbins 134ca99cfddSTim J. Robbins /* 135ca99cfddSTim J. Robbins * Splay to bring the newly inserted node to the root, then 136ca99cfddSTim J. Robbins * coalesce with left and right neighbours if possible. 137ca99cfddSTim J. Robbins */ 138ca99cfddSTim J. Robbins csn = cs->cs_root = cset_splay(cs->cs_root, ch); 139ca99cfddSTim J. Robbins if (csn->csn_left != NULL && 140ca99cfddSTim J. Robbins csn->csn_left->csn_max + 1 == csn->csn_min) { 141ca99cfddSTim J. Robbins oval = csn->csn_left->csn_min; 142ca99cfddSTim J. Robbins cs->cs_root = cset_delete(cs->cs_root, 143ca99cfddSTim J. Robbins csn->csn_left->csn_min); 144ca99cfddSTim J. Robbins ncsn->csn_min = oval; 145ca99cfddSTim J. Robbins } 146ca99cfddSTim J. Robbins csn = cs->cs_root = cset_splay(cs->cs_root, ch); 147ca99cfddSTim J. Robbins if (csn->csn_right != NULL && 148ca99cfddSTim J. Robbins csn->csn_right->csn_min - 1 == csn->csn_max) { 149ca99cfddSTim J. Robbins oval = csn->csn_right->csn_max; 150ca99cfddSTim J. Robbins cs->cs_root = cset_delete(cs->cs_root, 151ca99cfddSTim J. Robbins csn->csn_right->csn_min); 152ca99cfddSTim J. Robbins ncsn->csn_max = oval; 153ca99cfddSTim J. Robbins } 154ca99cfddSTim J. Robbins 155ca99cfddSTim J. Robbins return (true); 156ca99cfddSTim J. Robbins } 157ca99cfddSTim J. Robbins 158ca99cfddSTim J. Robbins /* 159ca99cfddSTim J. Robbins * cset_in_hard -- 160ca99cfddSTim J. Robbins * Determine whether a character is in the set without using 161ca99cfddSTim J. Robbins * the cache. 162ca99cfddSTim J. Robbins */ 163ca99cfddSTim J. Robbins bool 164ca99cfddSTim J. Robbins cset_in_hard(struct cset *cs, wchar_t ch) 165ca99cfddSTim J. Robbins { 166ca99cfddSTim J. Robbins struct csclass *csc; 167ca99cfddSTim J. Robbins 168ca99cfddSTim J. Robbins for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) 169ca99cfddSTim J. Robbins if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0) 170ca99cfddSTim J. Robbins return (cs->cs_invert ^ true); 171ca99cfddSTim J. Robbins if (cs->cs_root != NULL) { 172ca99cfddSTim J. Robbins cs->cs_root = cset_splay(cs->cs_root, ch); 173ca99cfddSTim J. Robbins return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0); 174ca99cfddSTim J. Robbins } 175ca99cfddSTim J. Robbins return (cs->cs_invert ^ false); 176ca99cfddSTim J. Robbins } 177ca99cfddSTim J. Robbins 178ca99cfddSTim J. Robbins /* 179ca99cfddSTim J. Robbins * cset_cache -- 180ca99cfddSTim J. Robbins * Update the cache. 181ca99cfddSTim J. Robbins */ 182ca99cfddSTim J. Robbins void 183ca99cfddSTim J. Robbins cset_cache(struct cset *cs) 184ca99cfddSTim J. Robbins { 185ca99cfddSTim J. Robbins wchar_t i; 186ca99cfddSTim J. Robbins 187ca99cfddSTim J. Robbins for (i = 0; i < CS_CACHE_SIZE; i++) 188ca99cfddSTim J. Robbins cs->cs_cache[i] = cset_in_hard(cs, i); 189ca99cfddSTim J. Robbins 190ca99cfddSTim J. Robbins cs->cs_havecache = true; 191ca99cfddSTim J. Robbins } 192ca99cfddSTim J. Robbins 193ca99cfddSTim J. Robbins /* 194ca99cfddSTim J. Robbins * cset_invert -- 195ca99cfddSTim J. Robbins * Invert the character set. 196ca99cfddSTim J. Robbins */ 197ca99cfddSTim J. Robbins void 198ca99cfddSTim J. Robbins cset_invert(struct cset *cs) 199ca99cfddSTim J. Robbins { 200ca99cfddSTim J. Robbins 201ca99cfddSTim J. Robbins cs->cs_invert ^= true; 202ca99cfddSTim J. Robbins cs->cs_havecache = false; 203ca99cfddSTim J. Robbins } 204ca99cfddSTim J. Robbins 205ca99cfddSTim J. Robbins /* 206ca99cfddSTim J. Robbins * cset_addclass -- 207ca99cfddSTim J. Robbins * Add a wctype()-style character class to the set, optionally 208ca99cfddSTim J. Robbins * inverting it. 209ca99cfddSTim J. Robbins */ 210ca99cfddSTim J. Robbins bool 211ca99cfddSTim J. Robbins cset_addclass(struct cset *cs, wctype_t type, bool invert) 212ca99cfddSTim J. Robbins { 213ca99cfddSTim J. Robbins struct csclass *csc; 214ca99cfddSTim J. Robbins 215ca99cfddSTim J. Robbins csc = malloc(sizeof(*csc)); 216ca99cfddSTim J. Robbins if (csc == NULL) 217ca99cfddSTim J. Robbins return (false); 218ca99cfddSTim J. Robbins csc->csc_type = type; 219ca99cfddSTim J. Robbins csc->csc_invert = invert; 220ca99cfddSTim J. Robbins csc->csc_next = cs->cs_classes; 221ca99cfddSTim J. Robbins cs->cs_classes = csc; 222ca99cfddSTim J. Robbins cs->cs_havecache = false; 223ca99cfddSTim J. Robbins return (true); 224ca99cfddSTim J. Robbins } 225ca99cfddSTim J. Robbins 226ca99cfddSTim J. Robbins static __inline int 227ca99cfddSTim J. Robbins cset_rangecmp(struct csnode *t, wchar_t ch) 228ca99cfddSTim J. Robbins { 229ca99cfddSTim J. Robbins 230ca99cfddSTim J. Robbins if (ch < t->csn_min) 231ca99cfddSTim J. Robbins return (-1); 232ca99cfddSTim J. Robbins if (ch > t->csn_max) 233ca99cfddSTim J. Robbins return (1); 234ca99cfddSTim J. Robbins return (0); 235ca99cfddSTim J. Robbins } 236ca99cfddSTim J. Robbins 237ca99cfddSTim J. Robbins static struct csnode * 238ca99cfddSTim J. Robbins cset_splay(struct csnode *t, wchar_t ch) 239ca99cfddSTim J. Robbins { 240ca99cfddSTim J. Robbins struct csnode N, *l, *r, *y; 241ca99cfddSTim J. Robbins 242ca99cfddSTim J. Robbins /* 243ca99cfddSTim J. Robbins * Based on public domain code from Sleator. 244ca99cfddSTim J. Robbins */ 245ca99cfddSTim J. Robbins 246ca99cfddSTim J. Robbins assert(t != NULL); 247ca99cfddSTim J. Robbins 248ca99cfddSTim J. Robbins N.csn_left = N.csn_right = NULL; 249ca99cfddSTim J. Robbins l = r = &N; 250ca99cfddSTim J. Robbins for (;;) { 251ca99cfddSTim J. Robbins if (cset_rangecmp(t, ch) < 0) { 252ca99cfddSTim J. Robbins if (t->csn_left != NULL && 253ca99cfddSTim J. Robbins cset_rangecmp(t->csn_left, ch) < 0) { 254ca99cfddSTim J. Robbins y = t->csn_left; 255ca99cfddSTim J. Robbins t->csn_left = y->csn_right; 256ca99cfddSTim J. Robbins y->csn_right = t; 257ca99cfddSTim J. Robbins t = y; 258ca99cfddSTim J. Robbins } 259ca99cfddSTim J. Robbins if (t->csn_left == NULL) 260ca99cfddSTim J. Robbins break; 261ca99cfddSTim J. Robbins r->csn_left = t; 262ca99cfddSTim J. Robbins r = t; 263ca99cfddSTim J. Robbins t = t->csn_left; 264ca99cfddSTim J. Robbins } else if (cset_rangecmp(t, ch) > 0) { 265ca99cfddSTim J. Robbins if (t->csn_right != NULL && 266ca99cfddSTim J. Robbins cset_rangecmp(t->csn_right, ch) > 0) { 267ca99cfddSTim J. Robbins y = t->csn_right; 268ca99cfddSTim J. Robbins t->csn_right = y->csn_left; 269ca99cfddSTim J. Robbins y->csn_left = t; 270ca99cfddSTim J. Robbins t = y; 271ca99cfddSTim J. Robbins } 272ca99cfddSTim J. Robbins if (t->csn_right == NULL) 273ca99cfddSTim J. Robbins break; 274ca99cfddSTim J. Robbins l->csn_right = t; 275ca99cfddSTim J. Robbins l = t; 276ca99cfddSTim J. Robbins t = t->csn_right; 277ca99cfddSTim J. Robbins } else 278ca99cfddSTim J. Robbins break; 279ca99cfddSTim J. Robbins } 280ca99cfddSTim J. Robbins l->csn_right = t->csn_left; 281ca99cfddSTim J. Robbins r->csn_left = t->csn_right; 282ca99cfddSTim J. Robbins t->csn_left = N.csn_right; 283ca99cfddSTim J. Robbins t->csn_right = N.csn_left; 284ca99cfddSTim J. Robbins return (t); 285ca99cfddSTim J. Robbins } 286ca99cfddSTim J. Robbins 287ca99cfddSTim J. Robbins static struct csnode * 288ca99cfddSTim J. Robbins cset_delete(struct csnode *t, wchar_t ch) 289ca99cfddSTim J. Robbins { 290ca99cfddSTim J. Robbins struct csnode *x; 291ca99cfddSTim J. Robbins 292ca99cfddSTim J. Robbins assert(t != NULL); 293ca99cfddSTim J. Robbins t = cset_splay(t, ch); 294ca99cfddSTim J. Robbins assert(cset_rangecmp(t, ch) == 0); 295ca99cfddSTim J. Robbins if (t->csn_left == NULL) 296ca99cfddSTim J. Robbins x = t->csn_right; 297ca99cfddSTim J. Robbins else { 298ca99cfddSTim J. Robbins x = cset_splay(t->csn_left, ch); 299ca99cfddSTim J. Robbins x->csn_right = t->csn_right; 300ca99cfddSTim J. Robbins } 301ca99cfddSTim J. Robbins free(t); 302ca99cfddSTim J. Robbins return x; 303ca99cfddSTim J. Robbins } 304