1*8e3e3a7aSWarner Losh /* 2*8e3e3a7aSWarner Losh ** $Id: lstring.c,v 2.56 2015/11/23 11:32:51 roberto Exp $ 3*8e3e3a7aSWarner Losh ** String table (keeps all strings handled by Lua) 4*8e3e3a7aSWarner Losh ** See Copyright Notice in lua.h 5*8e3e3a7aSWarner Losh */ 6*8e3e3a7aSWarner Losh 7*8e3e3a7aSWarner Losh #define lstring_c 8*8e3e3a7aSWarner Losh #define LUA_CORE 9*8e3e3a7aSWarner Losh 10*8e3e3a7aSWarner Losh #include "lprefix.h" 11*8e3e3a7aSWarner Losh 12*8e3e3a7aSWarner Losh 13*8e3e3a7aSWarner Losh #include <string.h> 14*8e3e3a7aSWarner Losh 15*8e3e3a7aSWarner Losh #include "lua.h" 16*8e3e3a7aSWarner Losh 17*8e3e3a7aSWarner Losh #include "ldebug.h" 18*8e3e3a7aSWarner Losh #include "ldo.h" 19*8e3e3a7aSWarner Losh #include "lmem.h" 20*8e3e3a7aSWarner Losh #include "lobject.h" 21*8e3e3a7aSWarner Losh #include "lstate.h" 22*8e3e3a7aSWarner Losh #include "lstring.h" 23*8e3e3a7aSWarner Losh 24*8e3e3a7aSWarner Losh 25*8e3e3a7aSWarner Losh #define MEMERRMSG "not enough memory" 26*8e3e3a7aSWarner Losh 27*8e3e3a7aSWarner Losh 28*8e3e3a7aSWarner Losh /* 29*8e3e3a7aSWarner Losh ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to 30*8e3e3a7aSWarner Losh ** compute its hash 31*8e3e3a7aSWarner Losh */ 32*8e3e3a7aSWarner Losh #if !defined(LUAI_HASHLIMIT) 33*8e3e3a7aSWarner Losh #define LUAI_HASHLIMIT 5 34*8e3e3a7aSWarner Losh #endif 35*8e3e3a7aSWarner Losh 36*8e3e3a7aSWarner Losh 37*8e3e3a7aSWarner Losh /* 38*8e3e3a7aSWarner Losh ** equality for long strings 39*8e3e3a7aSWarner Losh */ 40*8e3e3a7aSWarner Losh int luaS_eqlngstr (TString *a, TString *b) { 41*8e3e3a7aSWarner Losh size_t len = a->u.lnglen; 42*8e3e3a7aSWarner Losh lua_assert(a->tt == LUA_TLNGSTR && b->tt == LUA_TLNGSTR); 43*8e3e3a7aSWarner Losh return (a == b) || /* same instance or... */ 44*8e3e3a7aSWarner Losh ((len == b->u.lnglen) && /* equal length and ... */ 45*8e3e3a7aSWarner Losh (memcmp(getstr(a), getstr(b), len) == 0)); /* equal contents */ 46*8e3e3a7aSWarner Losh } 47*8e3e3a7aSWarner Losh 48*8e3e3a7aSWarner Losh 49*8e3e3a7aSWarner Losh unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { 50*8e3e3a7aSWarner Losh unsigned int h = seed ^ cast(unsigned int, l); 51*8e3e3a7aSWarner Losh size_t step = (l >> LUAI_HASHLIMIT) + 1; 52*8e3e3a7aSWarner Losh for (; l >= step; l -= step) 53*8e3e3a7aSWarner Losh h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1])); 54*8e3e3a7aSWarner Losh return h; 55*8e3e3a7aSWarner Losh } 56*8e3e3a7aSWarner Losh 57*8e3e3a7aSWarner Losh 58*8e3e3a7aSWarner Losh unsigned int luaS_hashlongstr (TString *ts) { 59*8e3e3a7aSWarner Losh lua_assert(ts->tt == LUA_TLNGSTR); 60*8e3e3a7aSWarner Losh if (ts->extra == 0) { /* no hash? */ 61*8e3e3a7aSWarner Losh ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash); 62*8e3e3a7aSWarner Losh ts->extra = 1; /* now it has its hash */ 63*8e3e3a7aSWarner Losh } 64*8e3e3a7aSWarner Losh return ts->hash; 65*8e3e3a7aSWarner Losh } 66*8e3e3a7aSWarner Losh 67*8e3e3a7aSWarner Losh 68*8e3e3a7aSWarner Losh /* 69*8e3e3a7aSWarner Losh ** resizes the string table 70*8e3e3a7aSWarner Losh */ 71*8e3e3a7aSWarner Losh void luaS_resize (lua_State *L, int newsize) { 72*8e3e3a7aSWarner Losh int i; 73*8e3e3a7aSWarner Losh stringtable *tb = &G(L)->strt; 74*8e3e3a7aSWarner Losh if (newsize > tb->size) { /* grow table if needed */ 75*8e3e3a7aSWarner Losh luaM_reallocvector(L, tb->hash, tb->size, newsize, TString *); 76*8e3e3a7aSWarner Losh for (i = tb->size; i < newsize; i++) 77*8e3e3a7aSWarner Losh tb->hash[i] = NULL; 78*8e3e3a7aSWarner Losh } 79*8e3e3a7aSWarner Losh for (i = 0; i < tb->size; i++) { /* rehash */ 80*8e3e3a7aSWarner Losh TString *p = tb->hash[i]; 81*8e3e3a7aSWarner Losh tb->hash[i] = NULL; 82*8e3e3a7aSWarner Losh while (p) { /* for each node in the list */ 83*8e3e3a7aSWarner Losh TString *hnext = p->u.hnext; /* save next */ 84*8e3e3a7aSWarner Losh unsigned int h = lmod(p->hash, newsize); /* new position */ 85*8e3e3a7aSWarner Losh p->u.hnext = tb->hash[h]; /* chain it */ 86*8e3e3a7aSWarner Losh tb->hash[h] = p; 87*8e3e3a7aSWarner Losh p = hnext; 88*8e3e3a7aSWarner Losh } 89*8e3e3a7aSWarner Losh } 90*8e3e3a7aSWarner Losh if (newsize < tb->size) { /* shrink table if needed */ 91*8e3e3a7aSWarner Losh /* vanishing slice should be empty */ 92*8e3e3a7aSWarner Losh lua_assert(tb->hash[newsize] == NULL && tb->hash[tb->size - 1] == NULL); 93*8e3e3a7aSWarner Losh luaM_reallocvector(L, tb->hash, tb->size, newsize, TString *); 94*8e3e3a7aSWarner Losh } 95*8e3e3a7aSWarner Losh tb->size = newsize; 96*8e3e3a7aSWarner Losh } 97*8e3e3a7aSWarner Losh 98*8e3e3a7aSWarner Losh 99*8e3e3a7aSWarner Losh /* 100*8e3e3a7aSWarner Losh ** Clear API string cache. (Entries cannot be empty, so fill them with 101*8e3e3a7aSWarner Losh ** a non-collectable string.) 102*8e3e3a7aSWarner Losh */ 103*8e3e3a7aSWarner Losh void luaS_clearcache (global_State *g) { 104*8e3e3a7aSWarner Losh int i, j; 105*8e3e3a7aSWarner Losh for (i = 0; i < STRCACHE_N; i++) 106*8e3e3a7aSWarner Losh for (j = 0; j < STRCACHE_M; j++) { 107*8e3e3a7aSWarner Losh if (iswhite(g->strcache[i][j])) /* will entry be collected? */ 108*8e3e3a7aSWarner Losh g->strcache[i][j] = g->memerrmsg; /* replace it with something fixed */ 109*8e3e3a7aSWarner Losh } 110*8e3e3a7aSWarner Losh } 111*8e3e3a7aSWarner Losh 112*8e3e3a7aSWarner Losh 113*8e3e3a7aSWarner Losh /* 114*8e3e3a7aSWarner Losh ** Initialize the string table and the string cache 115*8e3e3a7aSWarner Losh */ 116*8e3e3a7aSWarner Losh void luaS_init (lua_State *L) { 117*8e3e3a7aSWarner Losh global_State *g = G(L); 118*8e3e3a7aSWarner Losh int i, j; 119*8e3e3a7aSWarner Losh luaS_resize(L, MINSTRTABSIZE); /* initial size of string table */ 120*8e3e3a7aSWarner Losh /* pre-create memory-error message */ 121*8e3e3a7aSWarner Losh g->memerrmsg = luaS_newliteral(L, MEMERRMSG); 122*8e3e3a7aSWarner Losh luaC_fix(L, obj2gco(g->memerrmsg)); /* it should never be collected */ 123*8e3e3a7aSWarner Losh for (i = 0; i < STRCACHE_N; i++) /* fill cache with valid strings */ 124*8e3e3a7aSWarner Losh for (j = 0; j < STRCACHE_M; j++) 125*8e3e3a7aSWarner Losh g->strcache[i][j] = g->memerrmsg; 126*8e3e3a7aSWarner Losh } 127*8e3e3a7aSWarner Losh 128*8e3e3a7aSWarner Losh 129*8e3e3a7aSWarner Losh 130*8e3e3a7aSWarner Losh /* 131*8e3e3a7aSWarner Losh ** creates a new string object 132*8e3e3a7aSWarner Losh */ 133*8e3e3a7aSWarner Losh static TString *createstrobj (lua_State *L, size_t l, int tag, unsigned int h) { 134*8e3e3a7aSWarner Losh TString *ts; 135*8e3e3a7aSWarner Losh GCObject *o; 136*8e3e3a7aSWarner Losh size_t totalsize; /* total size of TString object */ 137*8e3e3a7aSWarner Losh totalsize = sizelstring(l); 138*8e3e3a7aSWarner Losh o = luaC_newobj(L, tag, totalsize); 139*8e3e3a7aSWarner Losh ts = gco2ts(o); 140*8e3e3a7aSWarner Losh ts->hash = h; 141*8e3e3a7aSWarner Losh ts->extra = 0; 142*8e3e3a7aSWarner Losh getstr(ts)[l] = '\0'; /* ending 0 */ 143*8e3e3a7aSWarner Losh return ts; 144*8e3e3a7aSWarner Losh } 145*8e3e3a7aSWarner Losh 146*8e3e3a7aSWarner Losh 147*8e3e3a7aSWarner Losh TString *luaS_createlngstrobj (lua_State *L, size_t l) { 148*8e3e3a7aSWarner Losh TString *ts = createstrobj(L, l, LUA_TLNGSTR, G(L)->seed); 149*8e3e3a7aSWarner Losh ts->u.lnglen = l; 150*8e3e3a7aSWarner Losh return ts; 151*8e3e3a7aSWarner Losh } 152*8e3e3a7aSWarner Losh 153*8e3e3a7aSWarner Losh 154*8e3e3a7aSWarner Losh void luaS_remove (lua_State *L, TString *ts) { 155*8e3e3a7aSWarner Losh stringtable *tb = &G(L)->strt; 156*8e3e3a7aSWarner Losh TString **p = &tb->hash[lmod(ts->hash, tb->size)]; 157*8e3e3a7aSWarner Losh while (*p != ts) /* find previous element */ 158*8e3e3a7aSWarner Losh p = &(*p)->u.hnext; 159*8e3e3a7aSWarner Losh *p = (*p)->u.hnext; /* remove element from its list */ 160*8e3e3a7aSWarner Losh tb->nuse--; 161*8e3e3a7aSWarner Losh } 162*8e3e3a7aSWarner Losh 163*8e3e3a7aSWarner Losh 164*8e3e3a7aSWarner Losh /* 165*8e3e3a7aSWarner Losh ** checks whether short string exists and reuses it or creates a new one 166*8e3e3a7aSWarner Losh */ 167*8e3e3a7aSWarner Losh static TString *internshrstr (lua_State *L, const char *str, size_t l) { 168*8e3e3a7aSWarner Losh TString *ts; 169*8e3e3a7aSWarner Losh global_State *g = G(L); 170*8e3e3a7aSWarner Losh unsigned int h = luaS_hash(str, l, g->seed); 171*8e3e3a7aSWarner Losh TString **list = &g->strt.hash[lmod(h, g->strt.size)]; 172*8e3e3a7aSWarner Losh lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ 173*8e3e3a7aSWarner Losh for (ts = *list; ts != NULL; ts = ts->u.hnext) { 174*8e3e3a7aSWarner Losh if (l == ts->shrlen && 175*8e3e3a7aSWarner Losh (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) { 176*8e3e3a7aSWarner Losh /* found! */ 177*8e3e3a7aSWarner Losh if (isdead(g, ts)) /* dead (but not collected yet)? */ 178*8e3e3a7aSWarner Losh changewhite(ts); /* resurrect it */ 179*8e3e3a7aSWarner Losh return ts; 180*8e3e3a7aSWarner Losh } 181*8e3e3a7aSWarner Losh } 182*8e3e3a7aSWarner Losh if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) { 183*8e3e3a7aSWarner Losh luaS_resize(L, g->strt.size * 2); 184*8e3e3a7aSWarner Losh list = &g->strt.hash[lmod(h, g->strt.size)]; /* recompute with new size */ 185*8e3e3a7aSWarner Losh } 186*8e3e3a7aSWarner Losh ts = createstrobj(L, l, LUA_TSHRSTR, h); 187*8e3e3a7aSWarner Losh memcpy(getstr(ts), str, l * sizeof(char)); 188*8e3e3a7aSWarner Losh ts->shrlen = cast_byte(l); 189*8e3e3a7aSWarner Losh ts->u.hnext = *list; 190*8e3e3a7aSWarner Losh *list = ts; 191*8e3e3a7aSWarner Losh g->strt.nuse++; 192*8e3e3a7aSWarner Losh return ts; 193*8e3e3a7aSWarner Losh } 194*8e3e3a7aSWarner Losh 195*8e3e3a7aSWarner Losh 196*8e3e3a7aSWarner Losh /* 197*8e3e3a7aSWarner Losh ** new string (with explicit length) 198*8e3e3a7aSWarner Losh */ 199*8e3e3a7aSWarner Losh TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { 200*8e3e3a7aSWarner Losh if (l <= LUAI_MAXSHORTLEN) /* short string? */ 201*8e3e3a7aSWarner Losh return internshrstr(L, str, l); 202*8e3e3a7aSWarner Losh else { 203*8e3e3a7aSWarner Losh TString *ts; 204*8e3e3a7aSWarner Losh if (l >= (MAX_SIZE - sizeof(TString))/sizeof(char)) 205*8e3e3a7aSWarner Losh luaM_toobig(L); 206*8e3e3a7aSWarner Losh ts = luaS_createlngstrobj(L, l); 207*8e3e3a7aSWarner Losh memcpy(getstr(ts), str, l * sizeof(char)); 208*8e3e3a7aSWarner Losh return ts; 209*8e3e3a7aSWarner Losh } 210*8e3e3a7aSWarner Losh } 211*8e3e3a7aSWarner Losh 212*8e3e3a7aSWarner Losh 213*8e3e3a7aSWarner Losh /* 214*8e3e3a7aSWarner Losh ** Create or reuse a zero-terminated string, first checking in the 215*8e3e3a7aSWarner Losh ** cache (using the string address as a key). The cache can contain 216*8e3e3a7aSWarner Losh ** only zero-terminated strings, so it is safe to use 'strcmp' to 217*8e3e3a7aSWarner Losh ** check hits. 218*8e3e3a7aSWarner Losh */ 219*8e3e3a7aSWarner Losh TString *luaS_new (lua_State *L, const char *str) { 220*8e3e3a7aSWarner Losh unsigned int i = point2uint(str) % STRCACHE_N; /* hash */ 221*8e3e3a7aSWarner Losh int j; 222*8e3e3a7aSWarner Losh TString **p = G(L)->strcache[i]; 223*8e3e3a7aSWarner Losh for (j = 0; j < STRCACHE_M; j++) { 224*8e3e3a7aSWarner Losh if (strcmp(str, getstr(p[j])) == 0) /* hit? */ 225*8e3e3a7aSWarner Losh return p[j]; /* that is it */ 226*8e3e3a7aSWarner Losh } 227*8e3e3a7aSWarner Losh /* normal route */ 228*8e3e3a7aSWarner Losh for (j = STRCACHE_M - 1; j > 0; j--) 229*8e3e3a7aSWarner Losh p[j] = p[j - 1]; /* move out last element */ 230*8e3e3a7aSWarner Losh /* new element is first in the list */ 231*8e3e3a7aSWarner Losh p[0] = luaS_newlstr(L, str, strlen(str)); 232*8e3e3a7aSWarner Losh return p[0]; 233*8e3e3a7aSWarner Losh } 234*8e3e3a7aSWarner Losh 235*8e3e3a7aSWarner Losh 236*8e3e3a7aSWarner Losh Udata *luaS_newudata (lua_State *L, size_t s) { 237*8e3e3a7aSWarner Losh Udata *u; 238*8e3e3a7aSWarner Losh GCObject *o; 239*8e3e3a7aSWarner Losh if (s > MAX_SIZE - sizeof(Udata)) 240*8e3e3a7aSWarner Losh luaM_toobig(L); 241*8e3e3a7aSWarner Losh o = luaC_newobj(L, LUA_TUSERDATA, sizeludata(s)); 242*8e3e3a7aSWarner Losh u = gco2u(o); 243*8e3e3a7aSWarner Losh u->len = s; 244*8e3e3a7aSWarner Losh u->metatable = NULL; 245*8e3e3a7aSWarner Losh setuservalue(L, u, luaO_nilobject); 246*8e3e3a7aSWarner Losh return u; 247*8e3e3a7aSWarner Losh } 248*8e3e3a7aSWarner Losh 249