1*eda14cbcSMatt Macy /* BEGIN CSTYLED */ 2*eda14cbcSMatt Macy /* 3*eda14cbcSMatt Macy ** $Id: lstrlib.c,v 1.178.1.1 2013/04/12 18:48:47 roberto Exp $ 4*eda14cbcSMatt Macy ** Standard library for string operations and pattern-matching 5*eda14cbcSMatt Macy ** See Copyright Notice in lua.h 6*eda14cbcSMatt Macy */ 7*eda14cbcSMatt Macy 8*eda14cbcSMatt Macy 9*eda14cbcSMatt Macy #define lstrlib_c 10*eda14cbcSMatt Macy #define LUA_LIB 11*eda14cbcSMatt Macy 12*eda14cbcSMatt Macy #include <sys/lua/lua.h> 13*eda14cbcSMatt Macy 14*eda14cbcSMatt Macy #include <sys/lua/lauxlib.h> 15*eda14cbcSMatt Macy #include <sys/lua/lualib.h> 16*eda14cbcSMatt Macy 17*eda14cbcSMatt Macy 18*eda14cbcSMatt Macy /* 19*eda14cbcSMatt Macy ** maximum number of captures that a pattern can do during 20*eda14cbcSMatt Macy ** pattern-matching. This limit is arbitrary. 21*eda14cbcSMatt Macy */ 22*eda14cbcSMatt Macy #if !defined(LUA_MAXCAPTURES) 23*eda14cbcSMatt Macy #define LUA_MAXCAPTURES 16 24*eda14cbcSMatt Macy #endif 25*eda14cbcSMatt Macy 26*eda14cbcSMatt Macy 27*eda14cbcSMatt Macy /* macro to `unsign' a character */ 28*eda14cbcSMatt Macy #define uchar(c) ((unsigned char)(c)) 29*eda14cbcSMatt Macy 30*eda14cbcSMatt Macy /* 31*eda14cbcSMatt Macy * The provided version of sprintf returns a char *, but str_format expects 32*eda14cbcSMatt Macy * it to return the number of characters printed. This version has the expected 33*eda14cbcSMatt Macy * behavior. 34*eda14cbcSMatt Macy */ 35*eda14cbcSMatt Macy static size_t str_sprintf(char *buf, const char *fmt, ...) { 36*eda14cbcSMatt Macy va_list args; 37*eda14cbcSMatt Macy size_t len; 38*eda14cbcSMatt Macy 39*eda14cbcSMatt Macy va_start(args, fmt); 40*eda14cbcSMatt Macy len = vsnprintf(buf, INT_MAX, fmt, args); 41*eda14cbcSMatt Macy va_end(args); 42*eda14cbcSMatt Macy 43*eda14cbcSMatt Macy return len; 44*eda14cbcSMatt Macy } 45*eda14cbcSMatt Macy 46*eda14cbcSMatt Macy 47*eda14cbcSMatt Macy static int str_len (lua_State *L) { 48*eda14cbcSMatt Macy size_t l; 49*eda14cbcSMatt Macy luaL_checklstring(L, 1, &l); 50*eda14cbcSMatt Macy lua_pushinteger(L, (lua_Integer)l); 51*eda14cbcSMatt Macy return 1; 52*eda14cbcSMatt Macy } 53*eda14cbcSMatt Macy 54*eda14cbcSMatt Macy 55*eda14cbcSMatt Macy /* translate a relative string position: negative means back from end */ 56*eda14cbcSMatt Macy static size_t posrelat (ptrdiff_t pos, size_t len) { 57*eda14cbcSMatt Macy if (pos >= 0) return (size_t)pos; 58*eda14cbcSMatt Macy else if (0u - (size_t)pos > len) return 0; 59*eda14cbcSMatt Macy else return len - ((size_t)-pos) + 1; 60*eda14cbcSMatt Macy } 61*eda14cbcSMatt Macy 62*eda14cbcSMatt Macy 63*eda14cbcSMatt Macy static int str_sub (lua_State *L) { 64*eda14cbcSMatt Macy size_t l; 65*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &l); 66*eda14cbcSMatt Macy size_t start = posrelat(luaL_checkinteger(L, 2), l); 67*eda14cbcSMatt Macy size_t end = posrelat(luaL_optinteger(L, 3, -1), l); 68*eda14cbcSMatt Macy if (start < 1) start = 1; 69*eda14cbcSMatt Macy if (end > l) end = l; 70*eda14cbcSMatt Macy if (start <= end) 71*eda14cbcSMatt Macy lua_pushlstring(L, s + start - 1, end - start + 1); 72*eda14cbcSMatt Macy else lua_pushliteral(L, ""); 73*eda14cbcSMatt Macy return 1; 74*eda14cbcSMatt Macy } 75*eda14cbcSMatt Macy 76*eda14cbcSMatt Macy 77*eda14cbcSMatt Macy static int str_reverse (lua_State *L) { 78*eda14cbcSMatt Macy size_t l, i; 79*eda14cbcSMatt Macy luaL_Buffer b; 80*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &l); 81*eda14cbcSMatt Macy char *p = luaL_buffinitsize(L, &b, l); 82*eda14cbcSMatt Macy for (i = 0; i < l; i++) 83*eda14cbcSMatt Macy p[i] = s[l - i - 1]; 84*eda14cbcSMatt Macy luaL_pushresultsize(&b, l); 85*eda14cbcSMatt Macy return 1; 86*eda14cbcSMatt Macy } 87*eda14cbcSMatt Macy 88*eda14cbcSMatt Macy 89*eda14cbcSMatt Macy static int str_lower (lua_State *L) { 90*eda14cbcSMatt Macy size_t l; 91*eda14cbcSMatt Macy size_t i; 92*eda14cbcSMatt Macy luaL_Buffer b; 93*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &l); 94*eda14cbcSMatt Macy char *p = luaL_buffinitsize(L, &b, l); 95*eda14cbcSMatt Macy for (i=0; i<l; i++) 96*eda14cbcSMatt Macy p[i] = tolower(uchar(s[i])); 97*eda14cbcSMatt Macy luaL_pushresultsize(&b, l); 98*eda14cbcSMatt Macy return 1; 99*eda14cbcSMatt Macy } 100*eda14cbcSMatt Macy 101*eda14cbcSMatt Macy 102*eda14cbcSMatt Macy static int str_upper (lua_State *L) { 103*eda14cbcSMatt Macy size_t l; 104*eda14cbcSMatt Macy size_t i; 105*eda14cbcSMatt Macy luaL_Buffer b; 106*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &l); 107*eda14cbcSMatt Macy char *p = luaL_buffinitsize(L, &b, l); 108*eda14cbcSMatt Macy for (i=0; i<l; i++) 109*eda14cbcSMatt Macy p[i] = toupper(uchar(s[i])); 110*eda14cbcSMatt Macy luaL_pushresultsize(&b, l); 111*eda14cbcSMatt Macy return 1; 112*eda14cbcSMatt Macy } 113*eda14cbcSMatt Macy 114*eda14cbcSMatt Macy 115*eda14cbcSMatt Macy /* reasonable limit to avoid arithmetic overflow */ 116*eda14cbcSMatt Macy #define MAXSIZE ((~(size_t)0) >> 1) 117*eda14cbcSMatt Macy 118*eda14cbcSMatt Macy static int str_rep (lua_State *L) { 119*eda14cbcSMatt Macy size_t l, lsep; 120*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &l); 121*eda14cbcSMatt Macy int n = luaL_checkint(L, 2); 122*eda14cbcSMatt Macy const char *sep = luaL_optlstring(L, 3, "", &lsep); 123*eda14cbcSMatt Macy if (n <= 0) lua_pushliteral(L, ""); 124*eda14cbcSMatt Macy else if (l + lsep < l || l + lsep >= MAXSIZE / n) /* may overflow? */ 125*eda14cbcSMatt Macy return luaL_error(L, "resulting string too large"); 126*eda14cbcSMatt Macy else { 127*eda14cbcSMatt Macy size_t totallen = n * l + (n - 1) * lsep; 128*eda14cbcSMatt Macy luaL_Buffer b; 129*eda14cbcSMatt Macy char *p = luaL_buffinitsize(L, &b, totallen); 130*eda14cbcSMatt Macy while (n-- > 1) { /* first n-1 copies (followed by separator) */ 131*eda14cbcSMatt Macy memcpy(p, s, l * sizeof(char)); p += l; 132*eda14cbcSMatt Macy if (lsep > 0) { /* avoid empty 'memcpy' (may be expensive) */ 133*eda14cbcSMatt Macy memcpy(p, sep, lsep * sizeof(char)); p += lsep; 134*eda14cbcSMatt Macy } 135*eda14cbcSMatt Macy } 136*eda14cbcSMatt Macy memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 137*eda14cbcSMatt Macy luaL_pushresultsize(&b, totallen); 138*eda14cbcSMatt Macy } 139*eda14cbcSMatt Macy return 1; 140*eda14cbcSMatt Macy } 141*eda14cbcSMatt Macy 142*eda14cbcSMatt Macy 143*eda14cbcSMatt Macy static int str_byte (lua_State *L) { 144*eda14cbcSMatt Macy size_t l; 145*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &l); 146*eda14cbcSMatt Macy size_t posi = posrelat(luaL_optinteger(L, 2, 1), l); 147*eda14cbcSMatt Macy size_t pose = posrelat(luaL_optinteger(L, 3, posi), l); 148*eda14cbcSMatt Macy int n, i; 149*eda14cbcSMatt Macy if (posi < 1) posi = 1; 150*eda14cbcSMatt Macy if (pose > l) pose = l; 151*eda14cbcSMatt Macy if (posi > pose) return 0; /* empty interval; return no values */ 152*eda14cbcSMatt Macy n = (int)(pose - posi + 1); 153*eda14cbcSMatt Macy if (posi + n <= pose) /* (size_t -> int) overflow? */ 154*eda14cbcSMatt Macy return luaL_error(L, "string slice too long"); 155*eda14cbcSMatt Macy luaL_checkstack(L, n, "string slice too long"); 156*eda14cbcSMatt Macy for (i=0; i<n; i++) 157*eda14cbcSMatt Macy lua_pushinteger(L, uchar(s[posi+i-1])); 158*eda14cbcSMatt Macy return n; 159*eda14cbcSMatt Macy } 160*eda14cbcSMatt Macy 161*eda14cbcSMatt Macy 162*eda14cbcSMatt Macy static int str_char (lua_State *L) { 163*eda14cbcSMatt Macy int n = lua_gettop(L); /* number of arguments */ 164*eda14cbcSMatt Macy int i; 165*eda14cbcSMatt Macy luaL_Buffer b; 166*eda14cbcSMatt Macy char *p = luaL_buffinitsize(L, &b, n); 167*eda14cbcSMatt Macy for (i=1; i<=n; i++) { 168*eda14cbcSMatt Macy int c = luaL_checkint(L, i); 169*eda14cbcSMatt Macy luaL_argcheck(L, uchar(c) == c, i, "value out of range"); 170*eda14cbcSMatt Macy p[i - 1] = uchar(c); 171*eda14cbcSMatt Macy } 172*eda14cbcSMatt Macy luaL_pushresultsize(&b, n); 173*eda14cbcSMatt Macy return 1; 174*eda14cbcSMatt Macy } 175*eda14cbcSMatt Macy 176*eda14cbcSMatt Macy 177*eda14cbcSMatt Macy #if defined(LUA_USE_DUMP) 178*eda14cbcSMatt Macy static int writer (lua_State *L, const void* b, size_t size, void* B) { 179*eda14cbcSMatt Macy (void)L; 180*eda14cbcSMatt Macy luaL_addlstring((luaL_Buffer*) B, (const char *)b, size); 181*eda14cbcSMatt Macy return 0; 182*eda14cbcSMatt Macy } 183*eda14cbcSMatt Macy 184*eda14cbcSMatt Macy 185*eda14cbcSMatt Macy static int str_dump (lua_State *L) { 186*eda14cbcSMatt Macy luaL_Buffer b; 187*eda14cbcSMatt Macy luaL_checktype(L, 1, LUA_TFUNCTION); 188*eda14cbcSMatt Macy lua_settop(L, 1); 189*eda14cbcSMatt Macy luaL_buffinit(L,&b); 190*eda14cbcSMatt Macy if (lua_dump(L, writer, &b) != 0) 191*eda14cbcSMatt Macy return luaL_error(L, "unable to dump given function"); 192*eda14cbcSMatt Macy luaL_pushresult(&b); 193*eda14cbcSMatt Macy return 1; 194*eda14cbcSMatt Macy } 195*eda14cbcSMatt Macy #endif 196*eda14cbcSMatt Macy 197*eda14cbcSMatt Macy 198*eda14cbcSMatt Macy /* 199*eda14cbcSMatt Macy ** {====================================================== 200*eda14cbcSMatt Macy ** PATTERN MATCHING 201*eda14cbcSMatt Macy ** ======================================================= 202*eda14cbcSMatt Macy */ 203*eda14cbcSMatt Macy 204*eda14cbcSMatt Macy 205*eda14cbcSMatt Macy #define CAP_UNFINISHED (-1) 206*eda14cbcSMatt Macy #define CAP_POSITION (-2) 207*eda14cbcSMatt Macy 208*eda14cbcSMatt Macy 209*eda14cbcSMatt Macy typedef struct MatchState { 210*eda14cbcSMatt Macy int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 211*eda14cbcSMatt Macy const char *src_init; /* init of source string */ 212*eda14cbcSMatt Macy const char *src_end; /* end ('\0') of source string */ 213*eda14cbcSMatt Macy const char *p_end; /* end ('\0') of pattern */ 214*eda14cbcSMatt Macy lua_State *L; 215*eda14cbcSMatt Macy int level; /* total number of captures (finished or unfinished) */ 216*eda14cbcSMatt Macy struct { 217*eda14cbcSMatt Macy const char *init; 218*eda14cbcSMatt Macy ptrdiff_t len; 219*eda14cbcSMatt Macy } capture[LUA_MAXCAPTURES]; 220*eda14cbcSMatt Macy } MatchState; 221*eda14cbcSMatt Macy 222*eda14cbcSMatt Macy 223*eda14cbcSMatt Macy /* recursive function */ 224*eda14cbcSMatt Macy static const char *match (MatchState *ms, const char *s, const char *p); 225*eda14cbcSMatt Macy 226*eda14cbcSMatt Macy 227*eda14cbcSMatt Macy /* maximum recursion depth for 'match' */ 228*eda14cbcSMatt Macy #if !defined(MAXCCALLS) 229*eda14cbcSMatt Macy #define MAXCCALLS 200 230*eda14cbcSMatt Macy #endif 231*eda14cbcSMatt Macy 232*eda14cbcSMatt Macy 233*eda14cbcSMatt Macy #define L_ESC '%' 234*eda14cbcSMatt Macy #define SPECIALS "^$*+?.([%-" 235*eda14cbcSMatt Macy 236*eda14cbcSMatt Macy 237*eda14cbcSMatt Macy static int check_capture (MatchState *ms, int l) { 238*eda14cbcSMatt Macy l -= '1'; 239*eda14cbcSMatt Macy if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) 240*eda14cbcSMatt Macy return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 241*eda14cbcSMatt Macy return l; 242*eda14cbcSMatt Macy } 243*eda14cbcSMatt Macy 244*eda14cbcSMatt Macy 245*eda14cbcSMatt Macy static int capture_to_close (MatchState *ms) { 246*eda14cbcSMatt Macy int level = ms->level; 247*eda14cbcSMatt Macy for (level--; level>=0; level--) 248*eda14cbcSMatt Macy if (ms->capture[level].len == CAP_UNFINISHED) return level; 249*eda14cbcSMatt Macy return luaL_error(ms->L, "invalid pattern capture"); 250*eda14cbcSMatt Macy } 251*eda14cbcSMatt Macy 252*eda14cbcSMatt Macy 253*eda14cbcSMatt Macy static const char *classend (MatchState *ms, const char *p) { 254*eda14cbcSMatt Macy switch (*p++) { 255*eda14cbcSMatt Macy case L_ESC: { 256*eda14cbcSMatt Macy if (p == ms->p_end) 257*eda14cbcSMatt Macy luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); 258*eda14cbcSMatt Macy return p+1; 259*eda14cbcSMatt Macy } 260*eda14cbcSMatt Macy case '[': { 261*eda14cbcSMatt Macy if (*p == '^') p++; 262*eda14cbcSMatt Macy do { /* look for a `]' */ 263*eda14cbcSMatt Macy if (p == ms->p_end) 264*eda14cbcSMatt Macy luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); 265*eda14cbcSMatt Macy if (*(p++) == L_ESC && p < ms->p_end) 266*eda14cbcSMatt Macy p++; /* skip escapes (e.g. `%]') */ 267*eda14cbcSMatt Macy } while (*p != ']'); 268*eda14cbcSMatt Macy return p+1; 269*eda14cbcSMatt Macy } 270*eda14cbcSMatt Macy default: { 271*eda14cbcSMatt Macy return p; 272*eda14cbcSMatt Macy } 273*eda14cbcSMatt Macy } 274*eda14cbcSMatt Macy } 275*eda14cbcSMatt Macy 276*eda14cbcSMatt Macy 277*eda14cbcSMatt Macy static int match_class (int c, int cl) { 278*eda14cbcSMatt Macy int res; 279*eda14cbcSMatt Macy switch (tolower(cl)) { 280*eda14cbcSMatt Macy case 'a' : res = isalpha(c); break; 281*eda14cbcSMatt Macy case 'c' : res = iscntrl(c); break; 282*eda14cbcSMatt Macy case 'd' : res = isdigit(c); break; 283*eda14cbcSMatt Macy case 'g' : res = isgraph(c); break; 284*eda14cbcSMatt Macy case 'l' : res = islower(c); break; 285*eda14cbcSMatt Macy case 'p' : res = ispunct(c); break; 286*eda14cbcSMatt Macy case 's' : res = isspace(c); break; 287*eda14cbcSMatt Macy case 'u' : res = isupper(c); break; 288*eda14cbcSMatt Macy case 'w' : res = isalnum(c); break; 289*eda14cbcSMatt Macy case 'x' : res = isxdigit(c); break; 290*eda14cbcSMatt Macy case 'z' : res = (c == 0); break; /* deprecated option */ 291*eda14cbcSMatt Macy default: return (cl == c); 292*eda14cbcSMatt Macy } 293*eda14cbcSMatt Macy return (islower(cl) ? res : !res); 294*eda14cbcSMatt Macy } 295*eda14cbcSMatt Macy 296*eda14cbcSMatt Macy 297*eda14cbcSMatt Macy static int matchbracketclass (int c, const char *p, const char *ec) { 298*eda14cbcSMatt Macy int sig = 1; 299*eda14cbcSMatt Macy if (*(p+1) == '^') { 300*eda14cbcSMatt Macy sig = 0; 301*eda14cbcSMatt Macy p++; /* skip the `^' */ 302*eda14cbcSMatt Macy } 303*eda14cbcSMatt Macy while (++p < ec) { 304*eda14cbcSMatt Macy if (*p == L_ESC) { 305*eda14cbcSMatt Macy p++; 306*eda14cbcSMatt Macy if (match_class(c, uchar(*p))) 307*eda14cbcSMatt Macy return sig; 308*eda14cbcSMatt Macy } 309*eda14cbcSMatt Macy else if ((*(p+1) == '-') && (p+2 < ec)) { 310*eda14cbcSMatt Macy p+=2; 311*eda14cbcSMatt Macy if (uchar(*(p-2)) <= c && c <= uchar(*p)) 312*eda14cbcSMatt Macy return sig; 313*eda14cbcSMatt Macy } 314*eda14cbcSMatt Macy else if (uchar(*p) == c) return sig; 315*eda14cbcSMatt Macy } 316*eda14cbcSMatt Macy return !sig; 317*eda14cbcSMatt Macy } 318*eda14cbcSMatt Macy 319*eda14cbcSMatt Macy 320*eda14cbcSMatt Macy static int singlematch (MatchState *ms, const char *s, const char *p, 321*eda14cbcSMatt Macy const char *ep) { 322*eda14cbcSMatt Macy if (s >= ms->src_end) 323*eda14cbcSMatt Macy return 0; 324*eda14cbcSMatt Macy else { 325*eda14cbcSMatt Macy int c = uchar(*s); 326*eda14cbcSMatt Macy switch (*p) { 327*eda14cbcSMatt Macy case '.': return 1; /* matches any char */ 328*eda14cbcSMatt Macy case L_ESC: return match_class(c, uchar(*(p+1))); 329*eda14cbcSMatt Macy case '[': return matchbracketclass(c, p, ep-1); 330*eda14cbcSMatt Macy default: return (uchar(*p) == c); 331*eda14cbcSMatt Macy } 332*eda14cbcSMatt Macy } 333*eda14cbcSMatt Macy } 334*eda14cbcSMatt Macy 335*eda14cbcSMatt Macy 336*eda14cbcSMatt Macy static const char *matchbalance (MatchState *ms, const char *s, 337*eda14cbcSMatt Macy const char *p) { 338*eda14cbcSMatt Macy if (p >= ms->p_end - 1) 339*eda14cbcSMatt Macy luaL_error(ms->L, "malformed pattern " 340*eda14cbcSMatt Macy "(missing arguments to " LUA_QL("%%b") ")"); 341*eda14cbcSMatt Macy if (*s != *p) return NULL; 342*eda14cbcSMatt Macy else { 343*eda14cbcSMatt Macy int b = *p; 344*eda14cbcSMatt Macy int e = *(p+1); 345*eda14cbcSMatt Macy int cont = 1; 346*eda14cbcSMatt Macy while (++s < ms->src_end) { 347*eda14cbcSMatt Macy if (*s == e) { 348*eda14cbcSMatt Macy if (--cont == 0) return s+1; 349*eda14cbcSMatt Macy } 350*eda14cbcSMatt Macy else if (*s == b) cont++; 351*eda14cbcSMatt Macy } 352*eda14cbcSMatt Macy } 353*eda14cbcSMatt Macy return NULL; /* string ends out of balance */ 354*eda14cbcSMatt Macy } 355*eda14cbcSMatt Macy 356*eda14cbcSMatt Macy 357*eda14cbcSMatt Macy static const char *max_expand (MatchState *ms, const char *s, 358*eda14cbcSMatt Macy const char *p, const char *ep) { 359*eda14cbcSMatt Macy ptrdiff_t i = 0; /* counts maximum expand for item */ 360*eda14cbcSMatt Macy while (singlematch(ms, s + i, p, ep)) 361*eda14cbcSMatt Macy i++; 362*eda14cbcSMatt Macy /* keeps trying to match with the maximum repetitions */ 363*eda14cbcSMatt Macy while (i>=0) { 364*eda14cbcSMatt Macy const char *res = match(ms, (s+i), ep+1); 365*eda14cbcSMatt Macy if (res) return res; 366*eda14cbcSMatt Macy i--; /* else didn't match; reduce 1 repetition to try again */ 367*eda14cbcSMatt Macy } 368*eda14cbcSMatt Macy return NULL; 369*eda14cbcSMatt Macy } 370*eda14cbcSMatt Macy 371*eda14cbcSMatt Macy 372*eda14cbcSMatt Macy static const char *min_expand (MatchState *ms, const char *s, 373*eda14cbcSMatt Macy const char *p, const char *ep) { 374*eda14cbcSMatt Macy for (;;) { 375*eda14cbcSMatt Macy const char *res = match(ms, s, ep+1); 376*eda14cbcSMatt Macy if (res != NULL) 377*eda14cbcSMatt Macy return res; 378*eda14cbcSMatt Macy else if (singlematch(ms, s, p, ep)) 379*eda14cbcSMatt Macy s++; /* try with one more repetition */ 380*eda14cbcSMatt Macy else return NULL; 381*eda14cbcSMatt Macy } 382*eda14cbcSMatt Macy } 383*eda14cbcSMatt Macy 384*eda14cbcSMatt Macy 385*eda14cbcSMatt Macy static const char *start_capture (MatchState *ms, const char *s, 386*eda14cbcSMatt Macy const char *p, int what) { 387*eda14cbcSMatt Macy const char *res; 388*eda14cbcSMatt Macy int level = ms->level; 389*eda14cbcSMatt Macy if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 390*eda14cbcSMatt Macy ms->capture[level].init = s; 391*eda14cbcSMatt Macy ms->capture[level].len = what; 392*eda14cbcSMatt Macy ms->level = level+1; 393*eda14cbcSMatt Macy if ((res=match(ms, s, p)) == NULL) /* match failed? */ 394*eda14cbcSMatt Macy ms->level--; /* undo capture */ 395*eda14cbcSMatt Macy return res; 396*eda14cbcSMatt Macy } 397*eda14cbcSMatt Macy 398*eda14cbcSMatt Macy 399*eda14cbcSMatt Macy static const char *end_capture (MatchState *ms, const char *s, 400*eda14cbcSMatt Macy const char *p) { 401*eda14cbcSMatt Macy int l = capture_to_close(ms); 402*eda14cbcSMatt Macy const char *res; 403*eda14cbcSMatt Macy ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 404*eda14cbcSMatt Macy if ((res = match(ms, s, p)) == NULL) /* match failed? */ 405*eda14cbcSMatt Macy ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 406*eda14cbcSMatt Macy return res; 407*eda14cbcSMatt Macy } 408*eda14cbcSMatt Macy 409*eda14cbcSMatt Macy 410*eda14cbcSMatt Macy static const char *match_capture (MatchState *ms, const char *s, int l) { 411*eda14cbcSMatt Macy size_t len; 412*eda14cbcSMatt Macy l = check_capture(ms, l); 413*eda14cbcSMatt Macy len = ms->capture[l].len; 414*eda14cbcSMatt Macy if ((size_t)(ms->src_end-s) >= len && 415*eda14cbcSMatt Macy memcmp(ms->capture[l].init, s, len) == 0) 416*eda14cbcSMatt Macy return s+len; 417*eda14cbcSMatt Macy else return NULL; 418*eda14cbcSMatt Macy } 419*eda14cbcSMatt Macy 420*eda14cbcSMatt Macy 421*eda14cbcSMatt Macy static const char *match (MatchState *ms, const char *s, const char *p) { 422*eda14cbcSMatt Macy if (ms->matchdepth-- == 0) 423*eda14cbcSMatt Macy luaL_error(ms->L, "pattern too complex"); 424*eda14cbcSMatt Macy init: /* using goto's to optimize tail recursion */ 425*eda14cbcSMatt Macy if (p != ms->p_end) { /* end of pattern? */ 426*eda14cbcSMatt Macy switch (*p) { 427*eda14cbcSMatt Macy case '(': { /* start capture */ 428*eda14cbcSMatt Macy if (*(p + 1) == ')') /* position capture? */ 429*eda14cbcSMatt Macy s = start_capture(ms, s, p + 2, CAP_POSITION); 430*eda14cbcSMatt Macy else 431*eda14cbcSMatt Macy s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 432*eda14cbcSMatt Macy break; 433*eda14cbcSMatt Macy } 434*eda14cbcSMatt Macy case ')': { /* end capture */ 435*eda14cbcSMatt Macy s = end_capture(ms, s, p + 1); 436*eda14cbcSMatt Macy break; 437*eda14cbcSMatt Macy } 438*eda14cbcSMatt Macy case '$': { 439*eda14cbcSMatt Macy if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ 440*eda14cbcSMatt Macy goto dflt; /* no; go to default */ 441*eda14cbcSMatt Macy s = (s == ms->src_end) ? s : NULL; /* check end of string */ 442*eda14cbcSMatt Macy break; 443*eda14cbcSMatt Macy } 444*eda14cbcSMatt Macy case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 445*eda14cbcSMatt Macy switch (*(p + 1)) { 446*eda14cbcSMatt Macy case 'b': { /* balanced string? */ 447*eda14cbcSMatt Macy s = matchbalance(ms, s, p + 2); 448*eda14cbcSMatt Macy if (s != NULL) { 449*eda14cbcSMatt Macy p += 4; goto init; /* return match(ms, s, p + 4); */ 450*eda14cbcSMatt Macy } /* else fail (s == NULL) */ 451*eda14cbcSMatt Macy break; 452*eda14cbcSMatt Macy } 453*eda14cbcSMatt Macy case 'f': { /* frontier? */ 454*eda14cbcSMatt Macy const char *ep; char previous; 455*eda14cbcSMatt Macy p += 2; 456*eda14cbcSMatt Macy if (*p != '[') 457*eda14cbcSMatt Macy luaL_error(ms->L, "missing " LUA_QL("[") " after " 458*eda14cbcSMatt Macy LUA_QL("%%f") " in pattern"); 459*eda14cbcSMatt Macy ep = classend(ms, p); /* points to what is next */ 460*eda14cbcSMatt Macy previous = (s == ms->src_init) ? '\0' : *(s - 1); 461*eda14cbcSMatt Macy if (!matchbracketclass(uchar(previous), p, ep - 1) && 462*eda14cbcSMatt Macy matchbracketclass(uchar(*s), p, ep - 1)) { 463*eda14cbcSMatt Macy p = ep; goto init; /* return match(ms, s, ep); */ 464*eda14cbcSMatt Macy } 465*eda14cbcSMatt Macy s = NULL; /* match failed */ 466*eda14cbcSMatt Macy break; 467*eda14cbcSMatt Macy } 468*eda14cbcSMatt Macy case '0': case '1': case '2': case '3': 469*eda14cbcSMatt Macy case '4': case '5': case '6': case '7': 470*eda14cbcSMatt Macy case '8': case '9': { /* capture results (%0-%9)? */ 471*eda14cbcSMatt Macy s = match_capture(ms, s, uchar(*(p + 1))); 472*eda14cbcSMatt Macy if (s != NULL) { 473*eda14cbcSMatt Macy p += 2; goto init; /* return match(ms, s, p + 2) */ 474*eda14cbcSMatt Macy } 475*eda14cbcSMatt Macy break; 476*eda14cbcSMatt Macy } 477*eda14cbcSMatt Macy default: goto dflt; 478*eda14cbcSMatt Macy } 479*eda14cbcSMatt Macy break; 480*eda14cbcSMatt Macy } 481*eda14cbcSMatt Macy default: dflt: { /* pattern class plus optional suffix */ 482*eda14cbcSMatt Macy const char *ep = classend(ms, p); /* points to optional suffix */ 483*eda14cbcSMatt Macy /* does not match at least once? */ 484*eda14cbcSMatt Macy if (!singlematch(ms, s, p, ep)) { 485*eda14cbcSMatt Macy if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 486*eda14cbcSMatt Macy p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 487*eda14cbcSMatt Macy } 488*eda14cbcSMatt Macy else /* '+' or no suffix */ 489*eda14cbcSMatt Macy s = NULL; /* fail */ 490*eda14cbcSMatt Macy } 491*eda14cbcSMatt Macy else { /* matched once */ 492*eda14cbcSMatt Macy switch (*ep) { /* handle optional suffix */ 493*eda14cbcSMatt Macy case '?': { /* optional */ 494*eda14cbcSMatt Macy const char *res; 495*eda14cbcSMatt Macy if ((res = match(ms, s + 1, ep + 1)) != NULL) 496*eda14cbcSMatt Macy s = res; 497*eda14cbcSMatt Macy else { 498*eda14cbcSMatt Macy p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 499*eda14cbcSMatt Macy } 500*eda14cbcSMatt Macy break; 501*eda14cbcSMatt Macy } 502*eda14cbcSMatt Macy case '+': /* 1 or more repetitions */ 503*eda14cbcSMatt Macy s++; /* 1 match already done */ 504*eda14cbcSMatt Macy /* FALLTHROUGH */ 505*eda14cbcSMatt Macy case '*': /* 0 or more repetitions */ 506*eda14cbcSMatt Macy s = max_expand(ms, s, p, ep); 507*eda14cbcSMatt Macy break; 508*eda14cbcSMatt Macy case '-': /* 0 or more repetitions (minimum) */ 509*eda14cbcSMatt Macy s = min_expand(ms, s, p, ep); 510*eda14cbcSMatt Macy break; 511*eda14cbcSMatt Macy default: /* no suffix */ 512*eda14cbcSMatt Macy s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 513*eda14cbcSMatt Macy } 514*eda14cbcSMatt Macy } 515*eda14cbcSMatt Macy break; 516*eda14cbcSMatt Macy } 517*eda14cbcSMatt Macy } 518*eda14cbcSMatt Macy } 519*eda14cbcSMatt Macy ms->matchdepth++; 520*eda14cbcSMatt Macy return s; 521*eda14cbcSMatt Macy } 522*eda14cbcSMatt Macy 523*eda14cbcSMatt Macy 524*eda14cbcSMatt Macy 525*eda14cbcSMatt Macy static const char *lmemfind (const char *s1, size_t l1, 526*eda14cbcSMatt Macy const char *s2, size_t l2) { 527*eda14cbcSMatt Macy if (l2 == 0) return s1; /* empty strings are everywhere */ 528*eda14cbcSMatt Macy else if (l2 > l1) return NULL; /* avoids a negative `l1' */ 529*eda14cbcSMatt Macy else { 530*eda14cbcSMatt Macy const char *init; /* to search for a `*s2' inside `s1' */ 531*eda14cbcSMatt Macy l2--; /* 1st char will be checked by `memchr' */ 532*eda14cbcSMatt Macy l1 = l1-l2; /* `s2' cannot be found after that */ 533*eda14cbcSMatt Macy while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 534*eda14cbcSMatt Macy init++; /* 1st char is already checked */ 535*eda14cbcSMatt Macy if (memcmp(init, s2+1, l2) == 0) 536*eda14cbcSMatt Macy return init-1; 537*eda14cbcSMatt Macy else { /* correct `l1' and `s1' to try again */ 538*eda14cbcSMatt Macy l1 -= init-s1; 539*eda14cbcSMatt Macy s1 = init; 540*eda14cbcSMatt Macy } 541*eda14cbcSMatt Macy } 542*eda14cbcSMatt Macy return NULL; /* not found */ 543*eda14cbcSMatt Macy } 544*eda14cbcSMatt Macy } 545*eda14cbcSMatt Macy 546*eda14cbcSMatt Macy 547*eda14cbcSMatt Macy static void push_onecapture (MatchState *ms, int i, const char *s, 548*eda14cbcSMatt Macy const char *e) { 549*eda14cbcSMatt Macy if (i >= ms->level) { 550*eda14cbcSMatt Macy if (i == 0) /* ms->level == 0, too */ 551*eda14cbcSMatt Macy lua_pushlstring(ms->L, s, e - s); /* add whole match */ 552*eda14cbcSMatt Macy else 553*eda14cbcSMatt Macy luaL_error(ms->L, "invalid capture index"); 554*eda14cbcSMatt Macy } 555*eda14cbcSMatt Macy else { 556*eda14cbcSMatt Macy ptrdiff_t l = ms->capture[i].len; 557*eda14cbcSMatt Macy if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); 558*eda14cbcSMatt Macy if (l == CAP_POSITION) 559*eda14cbcSMatt Macy lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); 560*eda14cbcSMatt Macy else 561*eda14cbcSMatt Macy lua_pushlstring(ms->L, ms->capture[i].init, l); 562*eda14cbcSMatt Macy } 563*eda14cbcSMatt Macy } 564*eda14cbcSMatt Macy 565*eda14cbcSMatt Macy 566*eda14cbcSMatt Macy static int push_captures (MatchState *ms, const char *s, const char *e) { 567*eda14cbcSMatt Macy int i; 568*eda14cbcSMatt Macy int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 569*eda14cbcSMatt Macy luaL_checkstack(ms->L, nlevels, "too many captures"); 570*eda14cbcSMatt Macy for (i = 0; i < nlevels; i++) 571*eda14cbcSMatt Macy push_onecapture(ms, i, s, e); 572*eda14cbcSMatt Macy return nlevels; /* number of strings pushed */ 573*eda14cbcSMatt Macy } 574*eda14cbcSMatt Macy 575*eda14cbcSMatt Macy 576*eda14cbcSMatt Macy /* check whether pattern has no special characters */ 577*eda14cbcSMatt Macy static int nospecials (const char *p, size_t l) { 578*eda14cbcSMatt Macy size_t upto = 0; 579*eda14cbcSMatt Macy do { 580*eda14cbcSMatt Macy if (strpbrk(p + upto, SPECIALS)) 581*eda14cbcSMatt Macy return 0; /* pattern has a special character */ 582*eda14cbcSMatt Macy upto += strlen(p + upto) + 1; /* may have more after \0 */ 583*eda14cbcSMatt Macy } while (upto <= l); 584*eda14cbcSMatt Macy return 1; /* no special chars found */ 585*eda14cbcSMatt Macy } 586*eda14cbcSMatt Macy 587*eda14cbcSMatt Macy 588*eda14cbcSMatt Macy static int str_find_aux (lua_State *L, int find) { 589*eda14cbcSMatt Macy size_t ls, lp; 590*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, 1, &ls); 591*eda14cbcSMatt Macy const char *p = luaL_checklstring(L, 2, &lp); 592*eda14cbcSMatt Macy size_t init = posrelat(luaL_optinteger(L, 3, 1), ls); 593*eda14cbcSMatt Macy if (init < 1) init = 1; 594*eda14cbcSMatt Macy else if (init > ls + 1) { /* start after string's end? */ 595*eda14cbcSMatt Macy lua_pushnil(L); /* cannot find anything */ 596*eda14cbcSMatt Macy return 1; 597*eda14cbcSMatt Macy } 598*eda14cbcSMatt Macy /* explicit request or no special characters? */ 599*eda14cbcSMatt Macy if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 600*eda14cbcSMatt Macy /* do a plain search */ 601*eda14cbcSMatt Macy const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp); 602*eda14cbcSMatt Macy if (s2) { 603*eda14cbcSMatt Macy lua_pushinteger(L, s2 - s + 1); 604*eda14cbcSMatt Macy lua_pushinteger(L, s2 - s + lp); 605*eda14cbcSMatt Macy return 2; 606*eda14cbcSMatt Macy } 607*eda14cbcSMatt Macy } 608*eda14cbcSMatt Macy else { 609*eda14cbcSMatt Macy MatchState ms; 610*eda14cbcSMatt Macy const char *s1 = s + init - 1; 611*eda14cbcSMatt Macy int anchor = (*p == '^'); 612*eda14cbcSMatt Macy if (anchor) { 613*eda14cbcSMatt Macy p++; lp--; /* skip anchor character */ 614*eda14cbcSMatt Macy } 615*eda14cbcSMatt Macy ms.L = L; 616*eda14cbcSMatt Macy ms.matchdepth = MAXCCALLS; 617*eda14cbcSMatt Macy ms.src_init = s; 618*eda14cbcSMatt Macy ms.src_end = s + ls; 619*eda14cbcSMatt Macy ms.p_end = p + lp; 620*eda14cbcSMatt Macy do { 621*eda14cbcSMatt Macy const char *res; 622*eda14cbcSMatt Macy ms.level = 0; 623*eda14cbcSMatt Macy lua_assert(ms.matchdepth == MAXCCALLS); 624*eda14cbcSMatt Macy if ((res=match(&ms, s1, p)) != NULL) { 625*eda14cbcSMatt Macy if (find) { 626*eda14cbcSMatt Macy lua_pushinteger(L, s1 - s + 1); /* start */ 627*eda14cbcSMatt Macy lua_pushinteger(L, res - s); /* end */ 628*eda14cbcSMatt Macy return push_captures(&ms, NULL, 0) + 2; 629*eda14cbcSMatt Macy } 630*eda14cbcSMatt Macy else 631*eda14cbcSMatt Macy return push_captures(&ms, s1, res); 632*eda14cbcSMatt Macy } 633*eda14cbcSMatt Macy } while (s1++ < ms.src_end && !anchor); 634*eda14cbcSMatt Macy } 635*eda14cbcSMatt Macy lua_pushnil(L); /* not found */ 636*eda14cbcSMatt Macy return 1; 637*eda14cbcSMatt Macy } 638*eda14cbcSMatt Macy 639*eda14cbcSMatt Macy 640*eda14cbcSMatt Macy static int str_find (lua_State *L) { 641*eda14cbcSMatt Macy return str_find_aux(L, 1); 642*eda14cbcSMatt Macy } 643*eda14cbcSMatt Macy 644*eda14cbcSMatt Macy 645*eda14cbcSMatt Macy static int str_match (lua_State *L) { 646*eda14cbcSMatt Macy return str_find_aux(L, 0); 647*eda14cbcSMatt Macy } 648*eda14cbcSMatt Macy 649*eda14cbcSMatt Macy 650*eda14cbcSMatt Macy static int gmatch_aux (lua_State *L) { 651*eda14cbcSMatt Macy MatchState ms; 652*eda14cbcSMatt Macy size_t ls, lp; 653*eda14cbcSMatt Macy const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); 654*eda14cbcSMatt Macy const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); 655*eda14cbcSMatt Macy const char *src; 656*eda14cbcSMatt Macy ms.L = L; 657*eda14cbcSMatt Macy ms.matchdepth = MAXCCALLS; 658*eda14cbcSMatt Macy ms.src_init = s; 659*eda14cbcSMatt Macy ms.src_end = s+ls; 660*eda14cbcSMatt Macy ms.p_end = p + lp; 661*eda14cbcSMatt Macy for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); 662*eda14cbcSMatt Macy src <= ms.src_end; 663*eda14cbcSMatt Macy src++) { 664*eda14cbcSMatt Macy const char *e; 665*eda14cbcSMatt Macy ms.level = 0; 666*eda14cbcSMatt Macy lua_assert(ms.matchdepth == MAXCCALLS); 667*eda14cbcSMatt Macy if ((e = match(&ms, src, p)) != NULL) { 668*eda14cbcSMatt Macy lua_Integer newstart = e-s; 669*eda14cbcSMatt Macy if (e == src) newstart++; /* empty match? go at least one position */ 670*eda14cbcSMatt Macy lua_pushinteger(L, newstart); 671*eda14cbcSMatt Macy lua_replace(L, lua_upvalueindex(3)); 672*eda14cbcSMatt Macy return push_captures(&ms, src, e); 673*eda14cbcSMatt Macy } 674*eda14cbcSMatt Macy } 675*eda14cbcSMatt Macy return 0; /* not found */ 676*eda14cbcSMatt Macy } 677*eda14cbcSMatt Macy 678*eda14cbcSMatt Macy 679*eda14cbcSMatt Macy static int str_gmatch (lua_State *L) { 680*eda14cbcSMatt Macy luaL_checkstring(L, 1); 681*eda14cbcSMatt Macy luaL_checkstring(L, 2); 682*eda14cbcSMatt Macy lua_settop(L, 2); 683*eda14cbcSMatt Macy lua_pushinteger(L, 0); 684*eda14cbcSMatt Macy lua_pushcclosure(L, gmatch_aux, 3); 685*eda14cbcSMatt Macy return 1; 686*eda14cbcSMatt Macy } 687*eda14cbcSMatt Macy 688*eda14cbcSMatt Macy 689*eda14cbcSMatt Macy static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 690*eda14cbcSMatt Macy const char *e) { 691*eda14cbcSMatt Macy size_t l, i; 692*eda14cbcSMatt Macy const char *news = lua_tolstring(ms->L, 3, &l); 693*eda14cbcSMatt Macy for (i = 0; i < l; i++) { 694*eda14cbcSMatt Macy if (news[i] != L_ESC) 695*eda14cbcSMatt Macy luaL_addchar(b, news[i]); 696*eda14cbcSMatt Macy else { 697*eda14cbcSMatt Macy i++; /* skip ESC */ 698*eda14cbcSMatt Macy if (!isdigit(uchar(news[i]))) { 699*eda14cbcSMatt Macy if (news[i] != L_ESC) 700*eda14cbcSMatt Macy luaL_error(ms->L, "invalid use of " LUA_QL("%c") 701*eda14cbcSMatt Macy " in replacement string", L_ESC); 702*eda14cbcSMatt Macy luaL_addchar(b, news[i]); 703*eda14cbcSMatt Macy } 704*eda14cbcSMatt Macy else if (news[i] == '0') 705*eda14cbcSMatt Macy luaL_addlstring(b, s, e - s); 706*eda14cbcSMatt Macy else { 707*eda14cbcSMatt Macy push_onecapture(ms, news[i] - '1', s, e); 708*eda14cbcSMatt Macy luaL_addvalue(b); /* add capture to accumulated result */ 709*eda14cbcSMatt Macy } 710*eda14cbcSMatt Macy } 711*eda14cbcSMatt Macy } 712*eda14cbcSMatt Macy } 713*eda14cbcSMatt Macy 714*eda14cbcSMatt Macy 715*eda14cbcSMatt Macy static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, 716*eda14cbcSMatt Macy const char *e, int tr) { 717*eda14cbcSMatt Macy lua_State *L = ms->L; 718*eda14cbcSMatt Macy switch (tr) { 719*eda14cbcSMatt Macy case LUA_TFUNCTION: { 720*eda14cbcSMatt Macy int n; 721*eda14cbcSMatt Macy lua_pushvalue(L, 3); 722*eda14cbcSMatt Macy n = push_captures(ms, s, e); 723*eda14cbcSMatt Macy lua_call(L, n, 1); 724*eda14cbcSMatt Macy break; 725*eda14cbcSMatt Macy } 726*eda14cbcSMatt Macy case LUA_TTABLE: { 727*eda14cbcSMatt Macy push_onecapture(ms, 0, s, e); 728*eda14cbcSMatt Macy lua_gettable(L, 3); 729*eda14cbcSMatt Macy break; 730*eda14cbcSMatt Macy } 731*eda14cbcSMatt Macy default: { /* LUA_TNUMBER or LUA_TSTRING */ 732*eda14cbcSMatt Macy add_s(ms, b, s, e); 733*eda14cbcSMatt Macy return; 734*eda14cbcSMatt Macy } 735*eda14cbcSMatt Macy } 736*eda14cbcSMatt Macy if (!lua_toboolean(L, -1)) { /* nil or false? */ 737*eda14cbcSMatt Macy lua_pop(L, 1); 738*eda14cbcSMatt Macy lua_pushlstring(L, s, e - s); /* keep original text */ 739*eda14cbcSMatt Macy } 740*eda14cbcSMatt Macy else if (!lua_isstring(L, -1)) 741*eda14cbcSMatt Macy luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); 742*eda14cbcSMatt Macy luaL_addvalue(b); /* add result to accumulator */ 743*eda14cbcSMatt Macy } 744*eda14cbcSMatt Macy 745*eda14cbcSMatt Macy 746*eda14cbcSMatt Macy static int str_gsub (lua_State *L) { 747*eda14cbcSMatt Macy size_t srcl, lp; 748*eda14cbcSMatt Macy const char *src = luaL_checklstring(L, 1, &srcl); 749*eda14cbcSMatt Macy const char *p = luaL_checklstring(L, 2, &lp); 750*eda14cbcSMatt Macy int tr = lua_type(L, 3); 751*eda14cbcSMatt Macy size_t max_s = luaL_optinteger(L, 4, srcl+1); 752*eda14cbcSMatt Macy int anchor = (*p == '^'); 753*eda14cbcSMatt Macy size_t n = 0; 754*eda14cbcSMatt Macy MatchState ms; 755*eda14cbcSMatt Macy luaL_Buffer b; 756*eda14cbcSMatt Macy luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 757*eda14cbcSMatt Macy tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 758*eda14cbcSMatt Macy "string/function/table expected"); 759*eda14cbcSMatt Macy luaL_buffinit(L, &b); 760*eda14cbcSMatt Macy if (anchor) { 761*eda14cbcSMatt Macy p++; lp--; /* skip anchor character */ 762*eda14cbcSMatt Macy } 763*eda14cbcSMatt Macy ms.L = L; 764*eda14cbcSMatt Macy ms.matchdepth = MAXCCALLS; 765*eda14cbcSMatt Macy ms.src_init = src; 766*eda14cbcSMatt Macy ms.src_end = src+srcl; 767*eda14cbcSMatt Macy ms.p_end = p + lp; 768*eda14cbcSMatt Macy while (n < max_s) { 769*eda14cbcSMatt Macy const char *e; 770*eda14cbcSMatt Macy ms.level = 0; 771*eda14cbcSMatt Macy lua_assert(ms.matchdepth == MAXCCALLS); 772*eda14cbcSMatt Macy e = match(&ms, src, p); 773*eda14cbcSMatt Macy if (e) { 774*eda14cbcSMatt Macy n++; 775*eda14cbcSMatt Macy add_value(&ms, &b, src, e, tr); 776*eda14cbcSMatt Macy } 777*eda14cbcSMatt Macy if (e && e>src) /* non empty match? */ 778*eda14cbcSMatt Macy src = e; /* skip it */ 779*eda14cbcSMatt Macy else if (src < ms.src_end) 780*eda14cbcSMatt Macy luaL_addchar(&b, *src++); 781*eda14cbcSMatt Macy else break; 782*eda14cbcSMatt Macy if (anchor) break; 783*eda14cbcSMatt Macy } 784*eda14cbcSMatt Macy luaL_addlstring(&b, src, ms.src_end-src); 785*eda14cbcSMatt Macy luaL_pushresult(&b); 786*eda14cbcSMatt Macy lua_pushinteger(L, n); /* number of substitutions */ 787*eda14cbcSMatt Macy return 2; 788*eda14cbcSMatt Macy } 789*eda14cbcSMatt Macy 790*eda14cbcSMatt Macy /* }====================================================== */ 791*eda14cbcSMatt Macy 792*eda14cbcSMatt Macy 793*eda14cbcSMatt Macy 794*eda14cbcSMatt Macy /* 795*eda14cbcSMatt Macy ** {====================================================== 796*eda14cbcSMatt Macy ** STRING FORMAT 797*eda14cbcSMatt Macy ** ======================================================= 798*eda14cbcSMatt Macy */ 799*eda14cbcSMatt Macy 800*eda14cbcSMatt Macy /* 801*eda14cbcSMatt Macy ** LUA_INTFRMLEN is the length modifier for integer conversions in 802*eda14cbcSMatt Macy ** 'string.format'; LUA_INTFRM_T is the integer type corresponding to 803*eda14cbcSMatt Macy ** the previous length 804*eda14cbcSMatt Macy */ 805*eda14cbcSMatt Macy #if !defined(LUA_INTFRMLEN) /* { */ 806*eda14cbcSMatt Macy #if defined(LUA_USE_LONGLONG) 807*eda14cbcSMatt Macy 808*eda14cbcSMatt Macy #define LUA_INTFRMLEN "ll" 809*eda14cbcSMatt Macy #define LUA_INTFRM_T long long 810*eda14cbcSMatt Macy 811*eda14cbcSMatt Macy #else 812*eda14cbcSMatt Macy 813*eda14cbcSMatt Macy #define LUA_INTFRMLEN "l" 814*eda14cbcSMatt Macy #define LUA_INTFRM_T long 815*eda14cbcSMatt Macy 816*eda14cbcSMatt Macy #endif 817*eda14cbcSMatt Macy #endif /* } */ 818*eda14cbcSMatt Macy 819*eda14cbcSMatt Macy 820*eda14cbcSMatt Macy /* 821*eda14cbcSMatt Macy ** LUA_FLTFRMLEN is the length modifier for float conversions in 822*eda14cbcSMatt Macy ** 'string.format'; LUA_FLTFRM_T is the float type corresponding to 823*eda14cbcSMatt Macy ** the previous length 824*eda14cbcSMatt Macy */ 825*eda14cbcSMatt Macy #if !defined(LUA_FLTFRMLEN) 826*eda14cbcSMatt Macy 827*eda14cbcSMatt Macy #define LUA_FLTFRMLEN "" 828*eda14cbcSMatt Macy #define LUA_FLTFRM_T double 829*eda14cbcSMatt Macy 830*eda14cbcSMatt Macy #endif 831*eda14cbcSMatt Macy 832*eda14cbcSMatt Macy 833*eda14cbcSMatt Macy /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 834*eda14cbcSMatt Macy #define MAX_ITEM 512 835*eda14cbcSMatt Macy /* valid flags in a format specification */ 836*eda14cbcSMatt Macy #define FLAGS "-+ #0" 837*eda14cbcSMatt Macy /* 838*eda14cbcSMatt Macy ** maximum size of each format specification (such as '%-099.99d') 839*eda14cbcSMatt Macy ** (+10 accounts for %99.99x plus margin of error) 840*eda14cbcSMatt Macy */ 841*eda14cbcSMatt Macy #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10) 842*eda14cbcSMatt Macy 843*eda14cbcSMatt Macy 844*eda14cbcSMatt Macy static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { 845*eda14cbcSMatt Macy size_t l; 846*eda14cbcSMatt Macy const char *s = luaL_checklstring(L, arg, &l); 847*eda14cbcSMatt Macy luaL_addchar(b, '"'); 848*eda14cbcSMatt Macy while (l--) { 849*eda14cbcSMatt Macy if (*s == '"' || *s == '\\' || *s == '\n') { 850*eda14cbcSMatt Macy luaL_addchar(b, '\\'); 851*eda14cbcSMatt Macy luaL_addchar(b, *s); 852*eda14cbcSMatt Macy } 853*eda14cbcSMatt Macy else if (*s == '\0' || iscntrl(uchar(*s))) { 854*eda14cbcSMatt Macy char buff[10]; 855*eda14cbcSMatt Macy if (!isdigit(uchar(*(s+1)))) 856*eda14cbcSMatt Macy snprintf(buff, sizeof(buff), "\\%d", (int)uchar(*s)); 857*eda14cbcSMatt Macy else 858*eda14cbcSMatt Macy snprintf(buff, sizeof(buff), "\\%03d", (int)uchar(*s)); 859*eda14cbcSMatt Macy luaL_addstring(b, buff); 860*eda14cbcSMatt Macy } 861*eda14cbcSMatt Macy else 862*eda14cbcSMatt Macy luaL_addchar(b, *s); 863*eda14cbcSMatt Macy s++; 864*eda14cbcSMatt Macy } 865*eda14cbcSMatt Macy luaL_addchar(b, '"'); 866*eda14cbcSMatt Macy } 867*eda14cbcSMatt Macy 868*eda14cbcSMatt Macy static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { 869*eda14cbcSMatt Macy const char *p = strfrmt; 870*eda14cbcSMatt Macy while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */ 871*eda14cbcSMatt Macy if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char)) 872*eda14cbcSMatt Macy luaL_error(L, "invalid format (repeated flags)"); 873*eda14cbcSMatt Macy if (isdigit(uchar(*p))) p++; /* skip width */ 874*eda14cbcSMatt Macy if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 875*eda14cbcSMatt Macy if (*p == '.') { 876*eda14cbcSMatt Macy p++; 877*eda14cbcSMatt Macy if (isdigit(uchar(*p))) p++; /* skip precision */ 878*eda14cbcSMatt Macy if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 879*eda14cbcSMatt Macy } 880*eda14cbcSMatt Macy if (isdigit(uchar(*p))) 881*eda14cbcSMatt Macy luaL_error(L, "invalid format (width or precision too long)"); 882*eda14cbcSMatt Macy *(form++) = '%'; 883*eda14cbcSMatt Macy memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char)); 884*eda14cbcSMatt Macy form += p - strfrmt + 1; 885*eda14cbcSMatt Macy *form = '\0'; 886*eda14cbcSMatt Macy return p; 887*eda14cbcSMatt Macy } 888*eda14cbcSMatt Macy 889*eda14cbcSMatt Macy 890*eda14cbcSMatt Macy /* 891*eda14cbcSMatt Macy ** add length modifier into formats 892*eda14cbcSMatt Macy */ 893*eda14cbcSMatt Macy static void addlenmod (char *form, const char *lenmod, size_t size) { 894*eda14cbcSMatt Macy size_t l = strlen(form); 895*eda14cbcSMatt Macy size_t lm = strlen(lenmod); 896*eda14cbcSMatt Macy char spec = form[l - 1]; 897*eda14cbcSMatt Macy strlcpy(form + l - 1, lenmod, size - (l - 1)); 898*eda14cbcSMatt Macy form[l + lm - 1] = spec; 899*eda14cbcSMatt Macy form[l + lm] = '\0'; 900*eda14cbcSMatt Macy } 901*eda14cbcSMatt Macy 902*eda14cbcSMatt Macy 903*eda14cbcSMatt Macy static int str_format (lua_State *L) { 904*eda14cbcSMatt Macy int top = lua_gettop(L); 905*eda14cbcSMatt Macy int arg = 1; 906*eda14cbcSMatt Macy size_t sfl; 907*eda14cbcSMatt Macy const char *strfrmt = luaL_checklstring(L, arg, &sfl); 908*eda14cbcSMatt Macy const char *strfrmt_end = strfrmt+sfl; 909*eda14cbcSMatt Macy luaL_Buffer b; 910*eda14cbcSMatt Macy luaL_buffinit(L, &b); 911*eda14cbcSMatt Macy while (strfrmt < strfrmt_end) { 912*eda14cbcSMatt Macy if (*strfrmt != L_ESC) 913*eda14cbcSMatt Macy luaL_addchar(&b, *strfrmt++); 914*eda14cbcSMatt Macy else if (*++strfrmt == L_ESC) 915*eda14cbcSMatt Macy luaL_addchar(&b, *strfrmt++); /* %% */ 916*eda14cbcSMatt Macy else { /* format item */ 917*eda14cbcSMatt Macy char form[MAX_FORMAT]; /* to store the format (`%...') */ 918*eda14cbcSMatt Macy char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */ 919*eda14cbcSMatt Macy int nb = 0; /* number of bytes in added item */ 920*eda14cbcSMatt Macy if (++arg > top) 921*eda14cbcSMatt Macy luaL_argerror(L, arg, "no value"); 922*eda14cbcSMatt Macy strfrmt = scanformat(L, strfrmt, form); 923*eda14cbcSMatt Macy switch (*strfrmt++) { 924*eda14cbcSMatt Macy case 'c': { 925*eda14cbcSMatt Macy nb = str_sprintf(buff, form, luaL_checkint(L, arg)); 926*eda14cbcSMatt Macy break; 927*eda14cbcSMatt Macy } 928*eda14cbcSMatt Macy case 'd': case 'i': { 929*eda14cbcSMatt Macy lua_Number n = luaL_checknumber(L, arg); 930*eda14cbcSMatt Macy LUA_INTFRM_T ni = (LUA_INTFRM_T)n; 931*eda14cbcSMatt Macy lua_Number diff = n - (lua_Number)ni; 932*eda14cbcSMatt Macy luaL_argcheck(L, -1 < diff && diff < 1, arg, 933*eda14cbcSMatt Macy "not a number in proper range"); 934*eda14cbcSMatt Macy addlenmod(form, LUA_INTFRMLEN, MAX_FORMAT); 935*eda14cbcSMatt Macy nb = str_sprintf(buff, form, ni); 936*eda14cbcSMatt Macy break; 937*eda14cbcSMatt Macy } 938*eda14cbcSMatt Macy case 'o': case 'u': case 'x': case 'X': { 939*eda14cbcSMatt Macy lua_Number n = luaL_checknumber(L, arg); 940*eda14cbcSMatt Macy unsigned LUA_INTFRM_T ni = (unsigned LUA_INTFRM_T)n; 941*eda14cbcSMatt Macy lua_Number diff = n - (lua_Number)ni; 942*eda14cbcSMatt Macy luaL_argcheck(L, -1 < diff && diff < 1, arg, 943*eda14cbcSMatt Macy "not a non-negative number in proper range"); 944*eda14cbcSMatt Macy addlenmod(form, LUA_INTFRMLEN, MAX_FORMAT); 945*eda14cbcSMatt Macy nb = str_sprintf(buff, form, ni); 946*eda14cbcSMatt Macy break; 947*eda14cbcSMatt Macy } 948*eda14cbcSMatt Macy #if defined(LUA_USE_FLOAT_FORMATS) 949*eda14cbcSMatt Macy case 'e': case 'E': case 'f': 950*eda14cbcSMatt Macy #if defined(LUA_USE_AFORMAT) 951*eda14cbcSMatt Macy case 'a': case 'A': 952*eda14cbcSMatt Macy #endif 953*eda14cbcSMatt Macy case 'g': case 'G': { 954*eda14cbcSMatt Macy addlenmod(form, LUA_FLTFRMLEN, MAX_FORMAT); 955*eda14cbcSMatt Macy nb = str_sprintf(buff, form, (LUA_FLTFRM_T)luaL_checknumber(L, arg)); 956*eda14cbcSMatt Macy break; 957*eda14cbcSMatt Macy } 958*eda14cbcSMatt Macy #endif 959*eda14cbcSMatt Macy case 'q': { 960*eda14cbcSMatt Macy addquoted(L, &b, arg); 961*eda14cbcSMatt Macy break; 962*eda14cbcSMatt Macy } 963*eda14cbcSMatt Macy case 's': { 964*eda14cbcSMatt Macy size_t l; 965*eda14cbcSMatt Macy const char *s = luaL_tolstring(L, arg, &l); 966*eda14cbcSMatt Macy if (!strchr(form, '.') && l >= 100) { 967*eda14cbcSMatt Macy /* no precision and string is too long to be formatted; 968*eda14cbcSMatt Macy keep original string */ 969*eda14cbcSMatt Macy luaL_addvalue(&b); 970*eda14cbcSMatt Macy break; 971*eda14cbcSMatt Macy } 972*eda14cbcSMatt Macy else { 973*eda14cbcSMatt Macy nb = str_sprintf(buff, form, s); 974*eda14cbcSMatt Macy lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 975*eda14cbcSMatt Macy break; 976*eda14cbcSMatt Macy } 977*eda14cbcSMatt Macy } 978*eda14cbcSMatt Macy default: { /* also treat cases `pnLlh' */ 979*eda14cbcSMatt Macy return luaL_error(L, "invalid option " LUA_QL("%%%c") " to " 980*eda14cbcSMatt Macy LUA_QL("format"), *(strfrmt - 1)); 981*eda14cbcSMatt Macy } 982*eda14cbcSMatt Macy } 983*eda14cbcSMatt Macy luaL_addsize(&b, nb); 984*eda14cbcSMatt Macy } 985*eda14cbcSMatt Macy } 986*eda14cbcSMatt Macy luaL_pushresult(&b); 987*eda14cbcSMatt Macy return 1; 988*eda14cbcSMatt Macy } 989*eda14cbcSMatt Macy 990*eda14cbcSMatt Macy /* }====================================================== */ 991*eda14cbcSMatt Macy 992*eda14cbcSMatt Macy 993*eda14cbcSMatt Macy static const luaL_Reg strlib[] = { 994*eda14cbcSMatt Macy {"byte", str_byte}, 995*eda14cbcSMatt Macy {"char", str_char}, 996*eda14cbcSMatt Macy #if defined(LUA_USE_DUMP) 997*eda14cbcSMatt Macy {"dump", str_dump}, 998*eda14cbcSMatt Macy #endif 999*eda14cbcSMatt Macy {"find", str_find}, 1000*eda14cbcSMatt Macy {"format", str_format}, 1001*eda14cbcSMatt Macy {"gmatch", str_gmatch}, 1002*eda14cbcSMatt Macy {"gsub", str_gsub}, 1003*eda14cbcSMatt Macy {"len", str_len}, 1004*eda14cbcSMatt Macy {"lower", str_lower}, 1005*eda14cbcSMatt Macy {"match", str_match}, 1006*eda14cbcSMatt Macy {"rep", str_rep}, 1007*eda14cbcSMatt Macy {"reverse", str_reverse}, 1008*eda14cbcSMatt Macy {"sub", str_sub}, 1009*eda14cbcSMatt Macy {"upper", str_upper}, 1010*eda14cbcSMatt Macy {NULL, NULL} 1011*eda14cbcSMatt Macy }; 1012*eda14cbcSMatt Macy 1013*eda14cbcSMatt Macy 1014*eda14cbcSMatt Macy static void createmetatable (lua_State *L) { 1015*eda14cbcSMatt Macy lua_createtable(L, 0, 1); /* table to be metatable for strings */ 1016*eda14cbcSMatt Macy lua_pushliteral(L, ""); /* dummy string */ 1017*eda14cbcSMatt Macy lua_pushvalue(L, -2); /* copy table */ 1018*eda14cbcSMatt Macy lua_setmetatable(L, -2); /* set table as metatable for strings */ 1019*eda14cbcSMatt Macy lua_pop(L, 1); /* pop dummy string */ 1020*eda14cbcSMatt Macy lua_pushvalue(L, -2); /* get string library */ 1021*eda14cbcSMatt Macy lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1022*eda14cbcSMatt Macy lua_pop(L, 1); /* pop metatable */ 1023*eda14cbcSMatt Macy } 1024*eda14cbcSMatt Macy 1025*eda14cbcSMatt Macy 1026*eda14cbcSMatt Macy /* 1027*eda14cbcSMatt Macy ** Open string library 1028*eda14cbcSMatt Macy */ 1029*eda14cbcSMatt Macy LUAMOD_API int luaopen_string (lua_State *L) { 1030*eda14cbcSMatt Macy luaL_newlib(L, strlib); 1031*eda14cbcSMatt Macy createmetatable(L); 1032*eda14cbcSMatt Macy return 1; 1033*eda14cbcSMatt Macy } 1034*eda14cbcSMatt Macy 1035*eda14cbcSMatt Macy #if defined(_KERNEL) 1036*eda14cbcSMatt Macy 1037*eda14cbcSMatt Macy EXPORT_SYMBOL(luaopen_string); 1038*eda14cbcSMatt Macy 1039*eda14cbcSMatt Macy #endif 1040*eda14cbcSMatt Macy /* END CSTYLED */ 1041