1ca99cfddSTim J. Robbins /*- 2ca99cfddSTim J. Robbins * Copyright (c) 2004 Tim J. Robbins. 3ca99cfddSTim J. Robbins * All rights reserved. 4ca99cfddSTim J. Robbins * 5ca99cfddSTim J. Robbins * Redistribution and use in source and binary forms, with or without 6ca99cfddSTim J. Robbins * modification, are permitted provided that the following conditions 7ca99cfddSTim J. Robbins * are met: 8ca99cfddSTim J. Robbins * 1. Redistributions of source code must retain the above copyright 9ca99cfddSTim J. Robbins * notice, this list of conditions and the following disclaimer. 10ca99cfddSTim J. Robbins * 2. Redistributions in binary form must reproduce the above copyright 11ca99cfddSTim J. Robbins * notice, this list of conditions and the following disclaimer in the 12ca99cfddSTim J. Robbins * documentation and/or other materials provided with the distribution. 13ca99cfddSTim J. Robbins * 14ca99cfddSTim J. Robbins * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15ca99cfddSTim J. Robbins * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16ca99cfddSTim J. Robbins * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17ca99cfddSTim J. Robbins * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18ca99cfddSTim J. Robbins * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19ca99cfddSTim J. Robbins * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20ca99cfddSTim J. Robbins * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21ca99cfddSTim J. Robbins * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22ca99cfddSTim J. Robbins * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23ca99cfddSTim J. Robbins * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24ca99cfddSTim J. Robbins * SUCH DAMAGE. 25ca99cfddSTim J. Robbins */ 26ca99cfddSTim J. Robbins /* 27ca99cfddSTim J. Robbins * "Set of characters" ADT implemented as a splay tree of extents, with 28ca99cfddSTim J. Robbins * a lookup table cache to simplify looking up the first bunch of 29ca99cfddSTim J. Robbins * characters (which are presumably more common than others). 30ca99cfddSTim J. Robbins */ 31ca99cfddSTim J. Robbins 32ca99cfddSTim J. Robbins #include <sys/cdefs.h> 33ca99cfddSTim J. Robbins __FBSDID("$FreeBSD$"); 34ca99cfddSTim J. Robbins 35ca99cfddSTim J. Robbins #include <assert.h> 36ca99cfddSTim J. Robbins #include <stdbool.h> 37ca99cfddSTim J. Robbins #include <stdlib.h> 38ca99cfddSTim J. Robbins #include <wchar.h> 39ca99cfddSTim J. Robbins #include <wctype.h> 40ca99cfddSTim J. Robbins #include "cset.h" 41ca99cfddSTim J. Robbins 42ca99cfddSTim J. Robbins static struct csnode * cset_delete(struct csnode *, wchar_t); 43ca99cfddSTim J. Robbins static __inline int cset_rangecmp(struct csnode *, wchar_t); 44ca99cfddSTim J. Robbins static struct csnode * cset_splay(struct csnode *, wchar_t); 45ca99cfddSTim J. Robbins 46ca99cfddSTim J. Robbins /* 47ca99cfddSTim J. Robbins * cset_alloc -- 48ca99cfddSTim J. Robbins * Allocate a set of characters. 49ca99cfddSTim J. Robbins */ 50ca99cfddSTim J. Robbins struct cset * 51ca99cfddSTim J. Robbins cset_alloc(void) 52ca99cfddSTim J. Robbins { 53ca99cfddSTim J. Robbins struct cset *cs; 54ca99cfddSTim J. Robbins 55ca99cfddSTim J. Robbins if ((cs = malloc(sizeof(*cs))) == NULL) 56ca99cfddSTim J. Robbins return (NULL); 57ca99cfddSTim J. Robbins cs->cs_root = NULL; 58ca99cfddSTim J. Robbins cs->cs_classes = NULL; 59ca99cfddSTim J. Robbins cs->cs_havecache = false; 609c8fd487STim J. Robbins cs->cs_invert = false; 61ca99cfddSTim J. Robbins return (cs); 62ca99cfddSTim J. Robbins } 63ca99cfddSTim J. Robbins 64ca99cfddSTim J. Robbins /* 65ca99cfddSTim J. Robbins * cset_add -- 66ca99cfddSTim J. Robbins * Add a character to the set. 67ca99cfddSTim J. Robbins */ 68ca99cfddSTim J. Robbins bool 69ca99cfddSTim J. Robbins cset_add(struct cset *cs, wchar_t ch) 70ca99cfddSTim J. Robbins { 71ca99cfddSTim J. Robbins struct csnode *csn, *ncsn; 72ca99cfddSTim J. Robbins wchar_t oval; 73ca99cfddSTim J. Robbins 74ca99cfddSTim J. Robbins cs->cs_havecache = false; 75ca99cfddSTim J. Robbins 76ca99cfddSTim J. Robbins /* 77ca99cfddSTim J. Robbins * Inserting into empty tree; new item becomes the root. 78ca99cfddSTim J. Robbins */ 79ca99cfddSTim J. Robbins if (cs->cs_root == NULL) { 80ca99cfddSTim J. Robbins csn = malloc(sizeof(*cs->cs_root)); 81ca99cfddSTim J. Robbins if (csn == NULL) 82ca99cfddSTim J. Robbins return (false); 83ca99cfddSTim J. Robbins csn->csn_left = csn->csn_right = NULL; 84ca99cfddSTim J. Robbins csn->csn_min = csn->csn_max = ch; 85ca99cfddSTim J. Robbins cs->cs_root = csn; 86ca99cfddSTim J. Robbins return (true); 87ca99cfddSTim J. Robbins } 88ca99cfddSTim J. Robbins 89ca99cfddSTim J. Robbins /* 90ca99cfddSTim J. Robbins * Splay to check whether the item already exists, and otherwise, 91ca99cfddSTim J. Robbins * where we should put it. 92ca99cfddSTim J. Robbins */ 93ca99cfddSTim J. Robbins csn = cs->cs_root = cset_splay(cs->cs_root, ch); 94ca99cfddSTim J. Robbins 95ca99cfddSTim J. Robbins /* 96cfab3bddSTim J. Robbins * Avoid adding duplicate nodes. 97ca99cfddSTim J. Robbins */ 98ca99cfddSTim J. Robbins if (cset_rangecmp(csn, ch) == 0) 99ca99cfddSTim J. Robbins return (true); 100ca99cfddSTim J. Robbins 101ca99cfddSTim J. Robbins /* 102cfab3bddSTim J. Robbins * Allocate a new node and make it the new root. 103ca99cfddSTim J. Robbins */ 104ca99cfddSTim J. Robbins ncsn = malloc(sizeof(*ncsn)); 105ca99cfddSTim J. Robbins if (ncsn == NULL) 106ca99cfddSTim J. Robbins return (false); 107ca99cfddSTim J. Robbins ncsn->csn_min = ncsn->csn_max = ch; 108ca99cfddSTim J. Robbins if (cset_rangecmp(csn, ch) < 0) { 109ca99cfddSTim J. Robbins ncsn->csn_left = csn->csn_left; 110ca99cfddSTim J. Robbins ncsn->csn_right = csn; 111ca99cfddSTim J. Robbins csn->csn_left = NULL; 112ca99cfddSTim J. Robbins } else { 113ca99cfddSTim J. Robbins ncsn->csn_right = csn->csn_right; 114ca99cfddSTim J. Robbins ncsn->csn_left = csn; 115ca99cfddSTim J. Robbins csn->csn_right = NULL; 116ca99cfddSTim J. Robbins } 117ca99cfddSTim J. Robbins cs->cs_root = ncsn; 118ca99cfddSTim J. Robbins 119ca99cfddSTim J. Robbins /* 120cfab3bddSTim J. Robbins * Coalesce with left and right neighbours if possible. 121ca99cfddSTim J. Robbins */ 122cfab3bddSTim J. Robbins if (ncsn->csn_left != NULL) { 123cfab3bddSTim J. Robbins ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); 124cfab3bddSTim J. Robbins if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { 125cfab3bddSTim J. Robbins oval = ncsn->csn_left->csn_min; 126cfab3bddSTim J. Robbins ncsn->csn_left = cset_delete(ncsn->csn_left, 127cfab3bddSTim J. Robbins ncsn->csn_left->csn_min); 128ca99cfddSTim J. Robbins ncsn->csn_min = oval; 129ca99cfddSTim J. Robbins } 130cfab3bddSTim J. Robbins } 131cfab3bddSTim J. Robbins if (ncsn->csn_right != NULL) { 132cfab3bddSTim J. Robbins ncsn->csn_right = cset_splay(ncsn->csn_right, 133cfab3bddSTim J. Robbins ncsn->csn_max + 1); 134cfab3bddSTim J. Robbins if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { 135cfab3bddSTim J. Robbins oval = ncsn->csn_right->csn_max; 136cfab3bddSTim J. Robbins ncsn->csn_right = cset_delete(ncsn->csn_right, 137cfab3bddSTim J. Robbins ncsn->csn_right->csn_min); 138ca99cfddSTim J. Robbins ncsn->csn_max = oval; 139ca99cfddSTim J. Robbins } 140cfab3bddSTim J. Robbins } 141ca99cfddSTim J. Robbins 142ca99cfddSTim J. Robbins return (true); 143ca99cfddSTim J. Robbins } 144ca99cfddSTim J. Robbins 145ca99cfddSTim J. Robbins /* 146ca99cfddSTim J. Robbins * cset_in_hard -- 147ca99cfddSTim J. Robbins * Determine whether a character is in the set without using 148ca99cfddSTim J. Robbins * the cache. 149ca99cfddSTim J. Robbins */ 150ca99cfddSTim J. Robbins bool 151ca99cfddSTim J. Robbins cset_in_hard(struct cset *cs, wchar_t ch) 152ca99cfddSTim J. Robbins { 153ca99cfddSTim J. Robbins struct csclass *csc; 154ca99cfddSTim J. Robbins 155ca99cfddSTim J. Robbins for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) 156ca99cfddSTim J. Robbins if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0) 157ca99cfddSTim J. Robbins return (cs->cs_invert ^ true); 158ca99cfddSTim J. Robbins if (cs->cs_root != NULL) { 159ca99cfddSTim J. Robbins cs->cs_root = cset_splay(cs->cs_root, ch); 160ca99cfddSTim J. Robbins return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0); 161ca99cfddSTim J. Robbins } 162ca99cfddSTim J. Robbins return (cs->cs_invert ^ false); 163ca99cfddSTim J. Robbins } 164ca99cfddSTim J. Robbins 165ca99cfddSTim J. Robbins /* 166ca99cfddSTim J. Robbins * cset_cache -- 167ca99cfddSTim J. Robbins * Update the cache. 168ca99cfddSTim J. Robbins */ 169ca99cfddSTim J. Robbins void 170ca99cfddSTim J. Robbins cset_cache(struct cset *cs) 171ca99cfddSTim J. Robbins { 172ca99cfddSTim J. Robbins wchar_t i; 173ca99cfddSTim J. Robbins 174ca99cfddSTim J. Robbins for (i = 0; i < CS_CACHE_SIZE; i++) 175ca99cfddSTim J. Robbins cs->cs_cache[i] = cset_in_hard(cs, i); 176ca99cfddSTim J. Robbins 177ca99cfddSTim J. Robbins cs->cs_havecache = true; 178ca99cfddSTim J. Robbins } 179ca99cfddSTim J. Robbins 180ca99cfddSTim J. Robbins /* 181ca99cfddSTim J. Robbins * cset_invert -- 182ca99cfddSTim J. Robbins * Invert the character set. 183ca99cfddSTim J. Robbins */ 184ca99cfddSTim J. Robbins void 185ca99cfddSTim J. Robbins cset_invert(struct cset *cs) 186ca99cfddSTim J. Robbins { 187ca99cfddSTim J. Robbins 188ca99cfddSTim J. Robbins cs->cs_invert ^= true; 189ca99cfddSTim J. Robbins cs->cs_havecache = false; 190ca99cfddSTim J. Robbins } 191ca99cfddSTim J. Robbins 192ca99cfddSTim J. Robbins /* 193ca99cfddSTim J. Robbins * cset_addclass -- 194ca99cfddSTim J. Robbins * Add a wctype()-style character class to the set, optionally 195ca99cfddSTim J. Robbins * inverting it. 196ca99cfddSTim J. Robbins */ 197ca99cfddSTim J. Robbins bool 198ca99cfddSTim J. Robbins cset_addclass(struct cset *cs, wctype_t type, bool invert) 199ca99cfddSTim J. Robbins { 200ca99cfddSTim J. Robbins struct csclass *csc; 201ca99cfddSTim J. Robbins 202ca99cfddSTim J. Robbins csc = malloc(sizeof(*csc)); 203ca99cfddSTim J. Robbins if (csc == NULL) 204ca99cfddSTim J. Robbins return (false); 205ca99cfddSTim J. Robbins csc->csc_type = type; 206ca99cfddSTim J. Robbins csc->csc_invert = invert; 207ca99cfddSTim J. Robbins csc->csc_next = cs->cs_classes; 208ca99cfddSTim J. Robbins cs->cs_classes = csc; 209ca99cfddSTim J. Robbins cs->cs_havecache = false; 210ca99cfddSTim J. Robbins return (true); 211ca99cfddSTim J. Robbins } 212ca99cfddSTim J. Robbins 213ca99cfddSTim J. Robbins static __inline int 214ca99cfddSTim J. Robbins cset_rangecmp(struct csnode *t, wchar_t ch) 215ca99cfddSTim J. Robbins { 216ca99cfddSTim J. Robbins 217ca99cfddSTim J. Robbins if (ch < t->csn_min) 218ca99cfddSTim J. Robbins return (-1); 219ca99cfddSTim J. Robbins if (ch > t->csn_max) 220ca99cfddSTim J. Robbins return (1); 221ca99cfddSTim J. Robbins return (0); 222ca99cfddSTim J. Robbins } 223ca99cfddSTim J. Robbins 224ca99cfddSTim J. Robbins static struct csnode * 225ca99cfddSTim J. Robbins cset_splay(struct csnode *t, wchar_t ch) 226ca99cfddSTim J. Robbins { 227ca99cfddSTim J. Robbins struct csnode N, *l, *r, *y; 228ca99cfddSTim J. Robbins 229ca99cfddSTim J. Robbins /* 230ca99cfddSTim J. Robbins * Based on public domain code from Sleator. 231ca99cfddSTim J. Robbins */ 232ca99cfddSTim J. Robbins 233ca99cfddSTim J. Robbins assert(t != NULL); 234ca99cfddSTim J. Robbins 235ca99cfddSTim J. Robbins N.csn_left = N.csn_right = NULL; 236ca99cfddSTim J. Robbins l = r = &N; 237ca99cfddSTim J. Robbins for (;;) { 238ca99cfddSTim J. Robbins if (cset_rangecmp(t, ch) < 0) { 239ca99cfddSTim J. Robbins if (t->csn_left != NULL && 240ca99cfddSTim J. Robbins cset_rangecmp(t->csn_left, ch) < 0) { 241ca99cfddSTim J. Robbins y = t->csn_left; 242ca99cfddSTim J. Robbins t->csn_left = y->csn_right; 243ca99cfddSTim J. Robbins y->csn_right = t; 244ca99cfddSTim J. Robbins t = y; 245ca99cfddSTim J. Robbins } 246ca99cfddSTim J. Robbins if (t->csn_left == NULL) 247ca99cfddSTim J. Robbins break; 248ca99cfddSTim J. Robbins r->csn_left = t; 249ca99cfddSTim J. Robbins r = t; 250ca99cfddSTim J. Robbins t = t->csn_left; 251ca99cfddSTim J. Robbins } else if (cset_rangecmp(t, ch) > 0) { 252ca99cfddSTim J. Robbins if (t->csn_right != NULL && 253ca99cfddSTim J. Robbins cset_rangecmp(t->csn_right, ch) > 0) { 254ca99cfddSTim J. Robbins y = t->csn_right; 255ca99cfddSTim J. Robbins t->csn_right = y->csn_left; 256ca99cfddSTim J. Robbins y->csn_left = t; 257ca99cfddSTim J. Robbins t = y; 258ca99cfddSTim J. Robbins } 259ca99cfddSTim J. Robbins if (t->csn_right == NULL) 260ca99cfddSTim J. Robbins break; 261ca99cfddSTim J. Robbins l->csn_right = t; 262ca99cfddSTim J. Robbins l = t; 263ca99cfddSTim J. Robbins t = t->csn_right; 264ca99cfddSTim J. Robbins } else 265ca99cfddSTim J. Robbins break; 266ca99cfddSTim J. Robbins } 267ca99cfddSTim J. Robbins l->csn_right = t->csn_left; 268ca99cfddSTim J. Robbins r->csn_left = t->csn_right; 269ca99cfddSTim J. Robbins t->csn_left = N.csn_right; 270ca99cfddSTim J. Robbins t->csn_right = N.csn_left; 271ca99cfddSTim J. Robbins return (t); 272ca99cfddSTim J. Robbins } 273ca99cfddSTim J. Robbins 274ca99cfddSTim J. Robbins static struct csnode * 275ca99cfddSTim J. Robbins cset_delete(struct csnode *t, wchar_t ch) 276ca99cfddSTim J. Robbins { 277ca99cfddSTim J. Robbins struct csnode *x; 278ca99cfddSTim J. Robbins 279ca99cfddSTim J. Robbins assert(t != NULL); 280ca99cfddSTim J. Robbins t = cset_splay(t, ch); 281ca99cfddSTim J. Robbins assert(cset_rangecmp(t, ch) == 0); 282ca99cfddSTim J. Robbins if (t->csn_left == NULL) 283ca99cfddSTim J. Robbins x = t->csn_right; 284ca99cfddSTim J. Robbins else { 285ca99cfddSTim J. Robbins x = cset_splay(t->csn_left, ch); 286ca99cfddSTim J. Robbins x->csn_right = t->csn_right; 287ca99cfddSTim J. Robbins } 288ca99cfddSTim J. Robbins free(t); 289ca99cfddSTim J. Robbins return x; 290ca99cfddSTim J. Robbins } 291