1 /* 2 ** $Id: lstrlib.c $ 3 ** Standard library for string operations and pattern-matching 4 ** See Copyright Notice in lua.h 5 */ 6 7 #define lstrlib_c 8 #define LUA_LIB 9 10 #include "lprefix.h" 11 12 13 #include <ctype.h> 14 #include <float.h> 15 #include <limits.h> 16 #include <locale.h> 17 #include <math.h> 18 #include <stddef.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "lua.h" 24 25 #include "lauxlib.h" 26 #include "lualib.h" 27 28 29 /* 30 ** maximum number of captures that a pattern can do during 31 ** pattern-matching. This limit is arbitrary, but must fit in 32 ** an unsigned char. 33 */ 34 #if !defined(LUA_MAXCAPTURES) 35 #define LUA_MAXCAPTURES 32 36 #endif 37 38 39 /* macro to 'unsign' a character */ 40 #define uchar(c) ((unsigned char)(c)) 41 42 43 /* 44 ** Some sizes are better limited to fit in 'int', but must also fit in 45 ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) 46 */ 47 #define MAX_SIZET ((size_t)(~(size_t)0)) 48 49 #define MAXSIZE \ 50 (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) 51 52 53 54 55 static int str_len (lua_State *L) { 56 size_t l; 57 luaL_checklstring(L, 1, &l); 58 lua_pushinteger(L, (lua_Integer)l); 59 return 1; 60 } 61 62 63 /* 64 ** translate a relative initial string position 65 ** (negative means back from end): clip result to [1, inf). 66 ** The length of any string in Lua must fit in a lua_Integer, 67 ** so there are no overflows in the casts. 68 ** The inverted comparison avoids a possible overflow 69 ** computing '-pos'. 70 */ 71 static size_t posrelatI (lua_Integer pos, size_t len) { 72 if (pos > 0) 73 return (size_t)pos; 74 else if (pos == 0) 75 return 1; 76 else if (pos < -(lua_Integer)len) /* inverted comparison */ 77 return 1; /* clip to 1 */ 78 else return len + (size_t)pos + 1; 79 } 80 81 82 /* 83 ** Gets an optional ending string position from argument 'arg', 84 ** with default value 'def'. 85 ** Negative means back from end: clip result to [0, len] 86 */ 87 static size_t getendpos (lua_State *L, int arg, lua_Integer def, 88 size_t len) { 89 lua_Integer pos = luaL_optinteger(L, arg, def); 90 if (pos > (lua_Integer)len) 91 return len; 92 else if (pos >= 0) 93 return (size_t)pos; 94 else if (pos < -(lua_Integer)len) 95 return 0; 96 else return len + (size_t)pos + 1; 97 } 98 99 100 static int str_sub (lua_State *L) { 101 size_t l; 102 const char *s = luaL_checklstring(L, 1, &l); 103 size_t start = posrelatI(luaL_checkinteger(L, 2), l); 104 size_t end = getendpos(L, 3, -1, l); 105 if (start <= end) 106 lua_pushlstring(L, s + start - 1, (end - start) + 1); 107 else lua_pushliteral(L, ""); 108 return 1; 109 } 110 111 112 static int str_reverse (lua_State *L) { 113 size_t l, i; 114 luaL_Buffer b; 115 const char *s = luaL_checklstring(L, 1, &l); 116 char *p = luaL_buffinitsize(L, &b, l); 117 for (i = 0; i < l; i++) 118 p[i] = s[l - i - 1]; 119 luaL_pushresultsize(&b, l); 120 return 1; 121 } 122 123 124 static int str_lower (lua_State *L) { 125 size_t l; 126 size_t i; 127 luaL_Buffer b; 128 const char *s = luaL_checklstring(L, 1, &l); 129 char *p = luaL_buffinitsize(L, &b, l); 130 for (i=0; i<l; i++) 131 p[i] = tolower(uchar(s[i])); 132 luaL_pushresultsize(&b, l); 133 return 1; 134 } 135 136 137 static int str_upper (lua_State *L) { 138 size_t l; 139 size_t i; 140 luaL_Buffer b; 141 const char *s = luaL_checklstring(L, 1, &l); 142 char *p = luaL_buffinitsize(L, &b, l); 143 for (i=0; i<l; i++) 144 p[i] = toupper(uchar(s[i])); 145 luaL_pushresultsize(&b, l); 146 return 1; 147 } 148 149 150 static int str_rep (lua_State *L) { 151 size_t l, lsep; 152 const char *s = luaL_checklstring(L, 1, &l); 153 lua_Integer n = luaL_checkinteger(L, 2); 154 const char *sep = luaL_optlstring(L, 3, "", &lsep); 155 if (n <= 0) 156 lua_pushliteral(L, ""); 157 else if (l_unlikely(l + lsep < l || l + lsep > MAXSIZE / n)) 158 return luaL_error(L, "resulting string too large"); 159 else { 160 size_t totallen = (size_t)n * l + (size_t)(n - 1) * lsep; 161 luaL_Buffer b; 162 char *p = luaL_buffinitsize(L, &b, totallen); 163 while (n-- > 1) { /* first n-1 copies (followed by separator) */ 164 memcpy(p, s, l * sizeof(char)); p += l; 165 if (lsep > 0) { /* empty 'memcpy' is not that cheap */ 166 memcpy(p, sep, lsep * sizeof(char)); 167 p += lsep; 168 } 169 } 170 memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 171 luaL_pushresultsize(&b, totallen); 172 } 173 return 1; 174 } 175 176 177 static int str_byte (lua_State *L) { 178 size_t l; 179 const char *s = luaL_checklstring(L, 1, &l); 180 lua_Integer pi = luaL_optinteger(L, 2, 1); 181 size_t posi = posrelatI(pi, l); 182 size_t pose = getendpos(L, 3, pi, l); 183 int n, i; 184 if (posi > pose) return 0; /* empty interval; return no values */ 185 if (l_unlikely(pose - posi >= (size_t)INT_MAX)) /* arithmetic overflow? */ 186 return luaL_error(L, "string slice too long"); 187 n = (int)(pose - posi) + 1; 188 luaL_checkstack(L, n, "string slice too long"); 189 for (i=0; i<n; i++) 190 lua_pushinteger(L, uchar(s[posi+i-1])); 191 return n; 192 } 193 194 195 static int str_char (lua_State *L) { 196 int n = lua_gettop(L); /* number of arguments */ 197 int i; 198 luaL_Buffer b; 199 char *p = luaL_buffinitsize(L, &b, n); 200 for (i=1; i<=n; i++) { 201 lua_Unsigned c = (lua_Unsigned)luaL_checkinteger(L, i); 202 luaL_argcheck(L, c <= (lua_Unsigned)UCHAR_MAX, i, "value out of range"); 203 p[i - 1] = uchar(c); 204 } 205 luaL_pushresultsize(&b, n); 206 return 1; 207 } 208 209 210 /* 211 ** Buffer to store the result of 'string.dump'. It must be initialized 212 ** after the call to 'lua_dump', to ensure that the function is on the 213 ** top of the stack when 'lua_dump' is called. ('luaL_buffinit' might 214 ** push stuff.) 215 */ 216 struct str_Writer { 217 int init; /* true iff buffer has been initialized */ 218 luaL_Buffer B; 219 }; 220 221 222 static int writer (lua_State *L, const void *b, size_t size, void *ud) { 223 struct str_Writer *state = (struct str_Writer *)ud; 224 if (!state->init) { 225 state->init = 1; 226 luaL_buffinit(L, &state->B); 227 } 228 luaL_addlstring(&state->B, (const char *)b, size); 229 return 0; 230 } 231 232 233 static int str_dump (lua_State *L) { 234 struct str_Writer state; 235 int strip = lua_toboolean(L, 2); 236 luaL_checktype(L, 1, LUA_TFUNCTION); 237 lua_settop(L, 1); /* ensure function is on the top of the stack */ 238 state.init = 0; 239 if (l_unlikely(lua_dump(L, writer, &state, strip) != 0)) 240 return luaL_error(L, "unable to dump given function"); 241 luaL_pushresult(&state.B); 242 return 1; 243 } 244 245 246 247 /* 248 ** {====================================================== 249 ** METAMETHODS 250 ** ======================================================= 251 */ 252 253 #if defined(LUA_NOCVTS2N) /* { */ 254 255 /* no coercion from strings to numbers */ 256 257 static const luaL_Reg stringmetamethods[] = { 258 {"__index", NULL}, /* placeholder */ 259 {NULL, NULL} 260 }; 261 262 #else /* }{ */ 263 264 static int tonum (lua_State *L, int arg) { 265 if (lua_type(L, arg) == LUA_TNUMBER) { /* already a number? */ 266 lua_pushvalue(L, arg); 267 return 1; 268 } 269 else { /* check whether it is a numerical string */ 270 size_t len; 271 const char *s = lua_tolstring(L, arg, &len); 272 return (s != NULL && lua_stringtonumber(L, s) == len + 1); 273 } 274 } 275 276 277 static void trymt (lua_State *L, const char *mtname) { 278 lua_settop(L, 2); /* back to the original arguments */ 279 if (l_unlikely(lua_type(L, 2) == LUA_TSTRING || 280 !luaL_getmetafield(L, 2, mtname))) 281 luaL_error(L, "attempt to %s a '%s' with a '%s'", mtname + 2, 282 luaL_typename(L, -2), luaL_typename(L, -1)); 283 lua_insert(L, -3); /* put metamethod before arguments */ 284 lua_call(L, 2, 1); /* call metamethod */ 285 } 286 287 288 static int arith (lua_State *L, int op, const char *mtname) { 289 if (tonum(L, 1) && tonum(L, 2)) 290 lua_arith(L, op); /* result will be on the top */ 291 else 292 trymt(L, mtname); 293 return 1; 294 } 295 296 297 static int arith_add (lua_State *L) { 298 return arith(L, LUA_OPADD, "__add"); 299 } 300 301 static int arith_sub (lua_State *L) { 302 return arith(L, LUA_OPSUB, "__sub"); 303 } 304 305 static int arith_mul (lua_State *L) { 306 return arith(L, LUA_OPMUL, "__mul"); 307 } 308 309 static int arith_mod (lua_State *L) { 310 return arith(L, LUA_OPMOD, "__mod"); 311 } 312 313 static int arith_pow (lua_State *L) { 314 return arith(L, LUA_OPPOW, "__pow"); 315 } 316 317 static int arith_div (lua_State *L) { 318 return arith(L, LUA_OPDIV, "__div"); 319 } 320 321 static int arith_idiv (lua_State *L) { 322 return arith(L, LUA_OPIDIV, "__idiv"); 323 } 324 325 static int arith_unm (lua_State *L) { 326 return arith(L, LUA_OPUNM, "__unm"); 327 } 328 329 330 static const luaL_Reg stringmetamethods[] = { 331 {"__add", arith_add}, 332 {"__sub", arith_sub}, 333 {"__mul", arith_mul}, 334 {"__mod", arith_mod}, 335 {"__pow", arith_pow}, 336 {"__div", arith_div}, 337 {"__idiv", arith_idiv}, 338 {"__unm", arith_unm}, 339 {"__index", NULL}, /* placeholder */ 340 {NULL, NULL} 341 }; 342 343 #endif /* } */ 344 345 /* }====================================================== */ 346 347 /* 348 ** {====================================================== 349 ** PATTERN MATCHING 350 ** ======================================================= 351 */ 352 353 354 #define CAP_UNFINISHED (-1) 355 #define CAP_POSITION (-2) 356 357 358 typedef struct MatchState { 359 const char *src_init; /* init of source string */ 360 const char *src_end; /* end ('\0') of source string */ 361 const char *p_end; /* end ('\0') of pattern */ 362 lua_State *L; 363 int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 364 unsigned char level; /* total number of captures (finished or unfinished) */ 365 struct { 366 const char *init; 367 ptrdiff_t len; 368 } capture[LUA_MAXCAPTURES]; 369 } MatchState; 370 371 372 /* recursive function */ 373 static const char *match (MatchState *ms, const char *s, const char *p); 374 375 376 /* maximum recursion depth for 'match' */ 377 #if !defined(MAXCCALLS) 378 #define MAXCCALLS 200 379 #endif 380 381 382 #define L_ESC '%' 383 #define SPECIALS "^$*+?.([%-" 384 385 386 static int check_capture (MatchState *ms, int l) { 387 l -= '1'; 388 if (l_unlikely(l < 0 || l >= ms->level || 389 ms->capture[l].len == CAP_UNFINISHED)) 390 return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 391 return l; 392 } 393 394 395 static int capture_to_close (MatchState *ms) { 396 int level = ms->level; 397 for (level--; level>=0; level--) 398 if (ms->capture[level].len == CAP_UNFINISHED) return level; 399 return luaL_error(ms->L, "invalid pattern capture"); 400 } 401 402 403 static const char *classend (MatchState *ms, const char *p) { 404 switch (*p++) { 405 case L_ESC: { 406 if (l_unlikely(p == ms->p_end)) 407 luaL_error(ms->L, "malformed pattern (ends with '%%')"); 408 return p+1; 409 } 410 case '[': { 411 if (*p == '^') p++; 412 do { /* look for a ']' */ 413 if (l_unlikely(p == ms->p_end)) 414 luaL_error(ms->L, "malformed pattern (missing ']')"); 415 if (*(p++) == L_ESC && p < ms->p_end) 416 p++; /* skip escapes (e.g. '%]') */ 417 } while (*p != ']'); 418 return p+1; 419 } 420 default: { 421 return p; 422 } 423 } 424 } 425 426 427 static int match_class (int c, int cl) { 428 int res; 429 switch (tolower(cl)) { 430 case 'a' : res = isalpha(c); break; 431 case 'c' : res = iscntrl(c); break; 432 case 'd' : res = isdigit(c); break; 433 case 'g' : res = isgraph(c); break; 434 case 'l' : res = islower(c); break; 435 case 'p' : res = ispunct(c); break; 436 case 's' : res = isspace(c); break; 437 case 'u' : res = isupper(c); break; 438 case 'w' : res = isalnum(c); break; 439 case 'x' : res = isxdigit(c); break; 440 case 'z' : res = (c == 0); break; /* deprecated option */ 441 default: return (cl == c); 442 } 443 return (islower(cl) ? res : !res); 444 } 445 446 447 static int matchbracketclass (int c, const char *p, const char *ec) { 448 int sig = 1; 449 if (*(p+1) == '^') { 450 sig = 0; 451 p++; /* skip the '^' */ 452 } 453 while (++p < ec) { 454 if (*p == L_ESC) { 455 p++; 456 if (match_class(c, uchar(*p))) 457 return sig; 458 } 459 else if ((*(p+1) == '-') && (p+2 < ec)) { 460 p+=2; 461 if (uchar(*(p-2)) <= c && c <= uchar(*p)) 462 return sig; 463 } 464 else if (uchar(*p) == c) return sig; 465 } 466 return !sig; 467 } 468 469 470 static int singlematch (MatchState *ms, const char *s, const char *p, 471 const char *ep) { 472 if (s >= ms->src_end) 473 return 0; 474 else { 475 int c = uchar(*s); 476 switch (*p) { 477 case '.': return 1; /* matches any char */ 478 case L_ESC: return match_class(c, uchar(*(p+1))); 479 case '[': return matchbracketclass(c, p, ep-1); 480 default: return (uchar(*p) == c); 481 } 482 } 483 } 484 485 486 static const char *matchbalance (MatchState *ms, const char *s, 487 const char *p) { 488 if (l_unlikely(p >= ms->p_end - 1)) 489 luaL_error(ms->L, "malformed pattern (missing arguments to '%%b')"); 490 if (*s != *p) return NULL; 491 else { 492 int b = *p; 493 int e = *(p+1); 494 int cont = 1; 495 while (++s < ms->src_end) { 496 if (*s == e) { 497 if (--cont == 0) return s+1; 498 } 499 else if (*s == b) cont++; 500 } 501 } 502 return NULL; /* string ends out of balance */ 503 } 504 505 506 static const char *max_expand (MatchState *ms, const char *s, 507 const char *p, const char *ep) { 508 ptrdiff_t i = 0; /* counts maximum expand for item */ 509 while (singlematch(ms, s + i, p, ep)) 510 i++; 511 /* keeps trying to match with the maximum repetitions */ 512 while (i>=0) { 513 const char *res = match(ms, (s+i), ep+1); 514 if (res) return res; 515 i--; /* else didn't match; reduce 1 repetition to try again */ 516 } 517 return NULL; 518 } 519 520 521 static const char *min_expand (MatchState *ms, const char *s, 522 const char *p, const char *ep) { 523 for (;;) { 524 const char *res = match(ms, s, ep+1); 525 if (res != NULL) 526 return res; 527 else if (singlematch(ms, s, p, ep)) 528 s++; /* try with one more repetition */ 529 else return NULL; 530 } 531 } 532 533 534 static const char *start_capture (MatchState *ms, const char *s, 535 const char *p, int what) { 536 const char *res; 537 int level = ms->level; 538 if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 539 ms->capture[level].init = s; 540 ms->capture[level].len = what; 541 ms->level = level+1; 542 if ((res=match(ms, s, p)) == NULL) /* match failed? */ 543 ms->level--; /* undo capture */ 544 return res; 545 } 546 547 548 static const char *end_capture (MatchState *ms, const char *s, 549 const char *p) { 550 int l = capture_to_close(ms); 551 const char *res; 552 ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 553 if ((res = match(ms, s, p)) == NULL) /* match failed? */ 554 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 555 return res; 556 } 557 558 559 static const char *match_capture (MatchState *ms, const char *s, int l) { 560 size_t len; 561 l = check_capture(ms, l); 562 len = ms->capture[l].len; 563 if ((size_t)(ms->src_end-s) >= len && 564 memcmp(ms->capture[l].init, s, len) == 0) 565 return s+len; 566 else return NULL; 567 } 568 569 570 static const char *match (MatchState *ms, const char *s, const char *p) { 571 if (l_unlikely(ms->matchdepth-- == 0)) 572 luaL_error(ms->L, "pattern too complex"); 573 init: /* using goto's to optimize tail recursion */ 574 if (p != ms->p_end) { /* end of pattern? */ 575 switch (*p) { 576 case '(': { /* start capture */ 577 if (*(p + 1) == ')') /* position capture? */ 578 s = start_capture(ms, s, p + 2, CAP_POSITION); 579 else 580 s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 581 break; 582 } 583 case ')': { /* end capture */ 584 s = end_capture(ms, s, p + 1); 585 break; 586 } 587 case '$': { 588 if ((p + 1) != ms->p_end) /* is the '$' the last char in pattern? */ 589 goto dflt; /* no; go to default */ 590 s = (s == ms->src_end) ? s : NULL; /* check end of string */ 591 break; 592 } 593 case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 594 switch (*(p + 1)) { 595 case 'b': { /* balanced string? */ 596 s = matchbalance(ms, s, p + 2); 597 if (s != NULL) { 598 p += 4; goto init; /* return match(ms, s, p + 4); */ 599 } /* else fail (s == NULL) */ 600 break; 601 } 602 case 'f': { /* frontier? */ 603 const char *ep; char previous; 604 p += 2; 605 if (l_unlikely(*p != '[')) 606 luaL_error(ms->L, "missing '[' after '%%f' in pattern"); 607 ep = classend(ms, p); /* points to what is next */ 608 previous = (s == ms->src_init) ? '\0' : *(s - 1); 609 if (!matchbracketclass(uchar(previous), p, ep - 1) && 610 matchbracketclass(uchar(*s), p, ep - 1)) { 611 p = ep; goto init; /* return match(ms, s, ep); */ 612 } 613 s = NULL; /* match failed */ 614 break; 615 } 616 case '0': case '1': case '2': case '3': 617 case '4': case '5': case '6': case '7': 618 case '8': case '9': { /* capture results (%0-%9)? */ 619 s = match_capture(ms, s, uchar(*(p + 1))); 620 if (s != NULL) { 621 p += 2; goto init; /* return match(ms, s, p + 2) */ 622 } 623 break; 624 } 625 default: goto dflt; 626 } 627 break; 628 } 629 default: dflt: { /* pattern class plus optional suffix */ 630 const char *ep = classend(ms, p); /* points to optional suffix */ 631 /* does not match at least once? */ 632 if (!singlematch(ms, s, p, ep)) { 633 if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 634 p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 635 } 636 else /* '+' or no suffix */ 637 s = NULL; /* fail */ 638 } 639 else { /* matched once */ 640 switch (*ep) { /* handle optional suffix */ 641 case '?': { /* optional */ 642 const char *res; 643 if ((res = match(ms, s + 1, ep + 1)) != NULL) 644 s = res; 645 else { 646 p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 647 } 648 break; 649 } 650 case '+': /* 1 or more repetitions */ 651 s++; /* 1 match already done */ 652 /* FALLTHROUGH */ 653 case '*': /* 0 or more repetitions */ 654 s = max_expand(ms, s, p, ep); 655 break; 656 case '-': /* 0 or more repetitions (minimum) */ 657 s = min_expand(ms, s, p, ep); 658 break; 659 default: /* no suffix */ 660 s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 661 } 662 } 663 break; 664 } 665 } 666 } 667 ms->matchdepth++; 668 return s; 669 } 670 671 672 673 static const char *lmemfind (const char *s1, size_t l1, 674 const char *s2, size_t l2) { 675 if (l2 == 0) return s1; /* empty strings are everywhere */ 676 else if (l2 > l1) return NULL; /* avoids a negative 'l1' */ 677 else { 678 const char *init; /* to search for a '*s2' inside 's1' */ 679 l2--; /* 1st char will be checked by 'memchr' */ 680 l1 = l1-l2; /* 's2' cannot be found after that */ 681 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 682 init++; /* 1st char is already checked */ 683 if (memcmp(init, s2+1, l2) == 0) 684 return init-1; 685 else { /* correct 'l1' and 's1' to try again */ 686 l1 -= init-s1; 687 s1 = init; 688 } 689 } 690 return NULL; /* not found */ 691 } 692 } 693 694 695 /* 696 ** get information about the i-th capture. If there are no captures 697 ** and 'i==0', return information about the whole match, which 698 ** is the range 's'..'e'. If the capture is a string, return 699 ** its length and put its address in '*cap'. If it is an integer 700 ** (a position), push it on the stack and return CAP_POSITION. 701 */ 702 static size_t get_onecapture (MatchState *ms, int i, const char *s, 703 const char *e, const char **cap) { 704 if (i >= ms->level) { 705 if (l_unlikely(i != 0)) 706 luaL_error(ms->L, "invalid capture index %%%d", i + 1); 707 *cap = s; 708 return e - s; 709 } 710 else { 711 ptrdiff_t capl = ms->capture[i].len; 712 *cap = ms->capture[i].init; 713 if (l_unlikely(capl == CAP_UNFINISHED)) 714 luaL_error(ms->L, "unfinished capture"); 715 else if (capl == CAP_POSITION) 716 lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1); 717 return capl; 718 } 719 } 720 721 722 /* 723 ** Push the i-th capture on the stack. 724 */ 725 static void push_onecapture (MatchState *ms, int i, const char *s, 726 const char *e) { 727 const char *cap; 728 ptrdiff_t l = get_onecapture(ms, i, s, e, &cap); 729 if (l != CAP_POSITION) 730 lua_pushlstring(ms->L, cap, l); 731 /* else position was already pushed */ 732 } 733 734 735 static int push_captures (MatchState *ms, const char *s, const char *e) { 736 int i; 737 int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 738 luaL_checkstack(ms->L, nlevels, "too many captures"); 739 for (i = 0; i < nlevels; i++) 740 push_onecapture(ms, i, s, e); 741 return nlevels; /* number of strings pushed */ 742 } 743 744 745 /* check whether pattern has no special characters */ 746 static int nospecials (const char *p, size_t l) { 747 size_t upto = 0; 748 do { 749 if (strpbrk(p + upto, SPECIALS)) 750 return 0; /* pattern has a special character */ 751 upto += strlen(p + upto) + 1; /* may have more after \0 */ 752 } while (upto <= l); 753 return 1; /* no special chars found */ 754 } 755 756 757 static void prepstate (MatchState *ms, lua_State *L, 758 const char *s, size_t ls, const char *p, size_t lp) { 759 ms->L = L; 760 ms->matchdepth = MAXCCALLS; 761 ms->src_init = s; 762 ms->src_end = s + ls; 763 ms->p_end = p + lp; 764 } 765 766 767 static void reprepstate (MatchState *ms) { 768 ms->level = 0; 769 lua_assert(ms->matchdepth == MAXCCALLS); 770 } 771 772 773 static int str_find_aux (lua_State *L, int find) { 774 size_t ls, lp; 775 const char *s = luaL_checklstring(L, 1, &ls); 776 const char *p = luaL_checklstring(L, 2, &lp); 777 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; 778 if (init > ls) { /* start after string's end? */ 779 luaL_pushfail(L); /* cannot find anything */ 780 return 1; 781 } 782 /* explicit request or no special characters? */ 783 if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 784 /* do a plain search */ 785 const char *s2 = lmemfind(s + init, ls - init, p, lp); 786 if (s2) { 787 lua_pushinteger(L, (s2 - s) + 1); 788 lua_pushinteger(L, (s2 - s) + lp); 789 return 2; 790 } 791 } 792 else { 793 MatchState ms; 794 const char *s1 = s + init; 795 int anchor = (*p == '^'); 796 if (anchor) { 797 p++; lp--; /* skip anchor character */ 798 } 799 prepstate(&ms, L, s, ls, p, lp); 800 do { 801 const char *res; 802 reprepstate(&ms); 803 if ((res=match(&ms, s1, p)) != NULL) { 804 if (find) { 805 lua_pushinteger(L, (s1 - s) + 1); /* start */ 806 lua_pushinteger(L, res - s); /* end */ 807 return push_captures(&ms, NULL, 0) + 2; 808 } 809 else 810 return push_captures(&ms, s1, res); 811 } 812 } while (s1++ < ms.src_end && !anchor); 813 } 814 luaL_pushfail(L); /* not found */ 815 return 1; 816 } 817 818 819 static int str_find (lua_State *L) { 820 return str_find_aux(L, 1); 821 } 822 823 824 static int str_match (lua_State *L) { 825 return str_find_aux(L, 0); 826 } 827 828 829 /* state for 'gmatch' */ 830 typedef struct GMatchState { 831 const char *src; /* current position */ 832 const char *p; /* pattern */ 833 const char *lastmatch; /* end of last match */ 834 MatchState ms; /* match state */ 835 } GMatchState; 836 837 838 static int gmatch_aux (lua_State *L) { 839 GMatchState *gm = (GMatchState *)lua_touserdata(L, lua_upvalueindex(3)); 840 const char *src; 841 gm->ms.L = L; 842 for (src = gm->src; src <= gm->ms.src_end; src++) { 843 const char *e; 844 reprepstate(&gm->ms); 845 if ((e = match(&gm->ms, src, gm->p)) != NULL && e != gm->lastmatch) { 846 gm->src = gm->lastmatch = e; 847 return push_captures(&gm->ms, src, e); 848 } 849 } 850 return 0; /* not found */ 851 } 852 853 854 static int gmatch (lua_State *L) { 855 size_t ls, lp; 856 const char *s = luaL_checklstring(L, 1, &ls); 857 const char *p = luaL_checklstring(L, 2, &lp); 858 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; 859 GMatchState *gm; 860 lua_settop(L, 2); /* keep strings on closure to avoid being collected */ 861 gm = (GMatchState *)lua_newuserdatauv(L, sizeof(GMatchState), 0); 862 if (init > ls) /* start after string's end? */ 863 init = ls + 1; /* avoid overflows in 's + init' */ 864 prepstate(&gm->ms, L, s, ls, p, lp); 865 gm->src = s + init; gm->p = p; gm->lastmatch = NULL; 866 lua_pushcclosure(L, gmatch_aux, 3); 867 return 1; 868 } 869 870 871 static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 872 const char *e) { 873 size_t l; 874 lua_State *L = ms->L; 875 const char *news = lua_tolstring(L, 3, &l); 876 const char *p; 877 while ((p = (char *)memchr(news, L_ESC, l)) != NULL) { 878 luaL_addlstring(b, news, p - news); 879 p++; /* skip ESC */ 880 if (*p == L_ESC) /* '%%' */ 881 luaL_addchar(b, *p); 882 else if (*p == '0') /* '%0' */ 883 luaL_addlstring(b, s, e - s); 884 else if (isdigit(uchar(*p))) { /* '%n' */ 885 const char *cap; 886 ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap); 887 if (resl == CAP_POSITION) 888 luaL_addvalue(b); /* add position to accumulated result */ 889 else 890 luaL_addlstring(b, cap, resl); 891 } 892 else 893 luaL_error(L, "invalid use of '%c' in replacement string", L_ESC); 894 l -= p + 1 - news; 895 news = p + 1; 896 } 897 luaL_addlstring(b, news, l); 898 } 899 900 901 /* 902 ** Add the replacement value to the string buffer 'b'. 903 ** Return true if the original string was changed. (Function calls and 904 ** table indexing resulting in nil or false do not change the subject.) 905 */ 906 static int add_value (MatchState *ms, luaL_Buffer *b, const char *s, 907 const char *e, int tr) { 908 lua_State *L = ms->L; 909 switch (tr) { 910 case LUA_TFUNCTION: { /* call the function */ 911 int n; 912 lua_pushvalue(L, 3); /* push the function */ 913 n = push_captures(ms, s, e); /* all captures as arguments */ 914 lua_call(L, n, 1); /* call it */ 915 break; 916 } 917 case LUA_TTABLE: { /* index the table */ 918 push_onecapture(ms, 0, s, e); /* first capture is the index */ 919 lua_gettable(L, 3); 920 break; 921 } 922 default: { /* LUA_TNUMBER or LUA_TSTRING */ 923 add_s(ms, b, s, e); /* add value to the buffer */ 924 return 1; /* something changed */ 925 } 926 } 927 if (!lua_toboolean(L, -1)) { /* nil or false? */ 928 lua_pop(L, 1); /* remove value */ 929 luaL_addlstring(b, s, e - s); /* keep original text */ 930 return 0; /* no changes */ 931 } 932 else if (l_unlikely(!lua_isstring(L, -1))) 933 return luaL_error(L, "invalid replacement value (a %s)", 934 luaL_typename(L, -1)); 935 else { 936 luaL_addvalue(b); /* add result to accumulator */ 937 return 1; /* something changed */ 938 } 939 } 940 941 942 static int str_gsub (lua_State *L) { 943 size_t srcl, lp; 944 const char *src = luaL_checklstring(L, 1, &srcl); /* subject */ 945 const char *p = luaL_checklstring(L, 2, &lp); /* pattern */ 946 const char *lastmatch = NULL; /* end of last match */ 947 int tr = lua_type(L, 3); /* replacement type */ 948 lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */ 949 int anchor = (*p == '^'); 950 lua_Integer n = 0; /* replacement count */ 951 int changed = 0; /* change flag */ 952 MatchState ms; 953 luaL_Buffer b; 954 luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 955 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 956 "string/function/table"); 957 luaL_buffinit(L, &b); 958 if (anchor) { 959 p++; lp--; /* skip anchor character */ 960 } 961 prepstate(&ms, L, src, srcl, p, lp); 962 while (n < max_s) { 963 const char *e; 964 reprepstate(&ms); /* (re)prepare state for new match */ 965 if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */ 966 n++; 967 changed = add_value(&ms, &b, src, e, tr) | changed; 968 src = lastmatch = e; 969 } 970 else if (src < ms.src_end) /* otherwise, skip one character */ 971 luaL_addchar(&b, *src++); 972 else break; /* end of subject */ 973 if (anchor) break; 974 } 975 if (!changed) /* no changes? */ 976 lua_pushvalue(L, 1); /* return original string */ 977 else { /* something changed */ 978 luaL_addlstring(&b, src, ms.src_end-src); 979 luaL_pushresult(&b); /* create and return new string */ 980 } 981 lua_pushinteger(L, n); /* number of substitutions */ 982 return 2; 983 } 984 985 /* }====================================================== */ 986 987 988 989 /* 990 ** {====================================================== 991 ** STRING FORMAT 992 ** ======================================================= 993 */ 994 995 #if !defined(lua_number2strx) /* { */ 996 997 /* 998 ** Hexadecimal floating-point formatter 999 */ 1000 1001 #define SIZELENMOD (sizeof(LUA_NUMBER_FRMLEN)/sizeof(char)) 1002 1003 1004 /* 1005 ** Number of bits that goes into the first digit. It can be any value 1006 ** between 1 and 4; the following definition tries to align the number 1007 ** to nibble boundaries by making what is left after that first digit a 1008 ** multiple of 4. 1009 */ 1010 #define L_NBFD ((l_floatatt(MANT_DIG) - 1)%4 + 1) 1011 1012 1013 /* 1014 ** Add integer part of 'x' to buffer and return new 'x' 1015 */ 1016 static lua_Number adddigit (char *buff, int n, lua_Number x) { 1017 lua_Number dd = l_mathop(floor)(x); /* get integer part from 'x' */ 1018 int d = (int)dd; 1019 buff[n] = (d < 10 ? d + '0' : d - 10 + 'a'); /* add to buffer */ 1020 return x - dd; /* return what is left */ 1021 } 1022 1023 1024 static int num2straux (char *buff, int sz, lua_Number x) { 1025 /* if 'inf' or 'NaN', format it like '%g' */ 1026 if (x != x || x == (lua_Number)HUGE_VAL || x == -(lua_Number)HUGE_VAL) 1027 return l_sprintf(buff, sz, LUA_NUMBER_FMT, (LUAI_UACNUMBER)x); 1028 else if (x == 0) { /* can be -0... */ 1029 /* create "0" or "-0" followed by exponent */ 1030 return l_sprintf(buff, sz, LUA_NUMBER_FMT "x0p+0", (LUAI_UACNUMBER)x); 1031 } 1032 else { 1033 int e; 1034 lua_Number m = l_mathop(frexp)(x, &e); /* 'x' fraction and exponent */ 1035 int n = 0; /* character count */ 1036 if (m < 0) { /* is number negative? */ 1037 buff[n++] = '-'; /* add sign */ 1038 m = -m; /* make it positive */ 1039 } 1040 buff[n++] = '0'; buff[n++] = 'x'; /* add "0x" */ 1041 m = adddigit(buff, n++, m * (1 << L_NBFD)); /* add first digit */ 1042 e -= L_NBFD; /* this digit goes before the radix point */ 1043 if (m > 0) { /* more digits? */ 1044 buff[n++] = lua_getlocaledecpoint(); /* add radix point */ 1045 do { /* add as many digits as needed */ 1046 m = adddigit(buff, n++, m * 16); 1047 } while (m > 0); 1048 } 1049 n += l_sprintf(buff + n, sz - n, "p%+d", e); /* add exponent */ 1050 lua_assert(n < sz); 1051 return n; 1052 } 1053 } 1054 1055 1056 static int lua_number2strx (lua_State *L, char *buff, int sz, 1057 const char *fmt, lua_Number x) { 1058 int n = num2straux(buff, sz, x); 1059 if (fmt[SIZELENMOD] == 'A') { 1060 int i; 1061 for (i = 0; i < n; i++) 1062 buff[i] = toupper(uchar(buff[i])); 1063 } 1064 else if (l_unlikely(fmt[SIZELENMOD] != 'a')) 1065 return luaL_error(L, "modifiers for format '%%a'/'%%A' not implemented"); 1066 return n; 1067 } 1068 1069 #endif /* } */ 1070 1071 1072 /* 1073 ** Maximum size for items formatted with '%f'. This size is produced 1074 ** by format('%.99f', -maxfloat), and is equal to 99 + 3 ('-', '.', 1075 ** and '\0') + number of decimal digits to represent maxfloat (which 1076 ** is maximum exponent + 1). (99+3+1, adding some extra, 110) 1077 */ 1078 #define MAX_ITEMF (110 + l_floatatt(MAX_10_EXP)) 1079 1080 1081 /* 1082 ** All formats except '%f' do not need that large limit. The other 1083 ** float formats use exponents, so that they fit in the 99 limit for 1084 ** significant digits; 's' for large strings and 'q' add items directly 1085 ** to the buffer; all integer formats also fit in the 99 limit. The 1086 ** worst case are floats: they may need 99 significant digits, plus 1087 ** '0x', '-', '.', 'e+XXXX', and '\0'. Adding some extra, 120. 1088 */ 1089 #define MAX_ITEM 120 1090 1091 1092 /* valid flags in a format specification */ 1093 #if !defined(L_FMTFLAGSF) 1094 1095 /* valid flags for a, A, e, E, f, F, g, and G conversions */ 1096 #define L_FMTFLAGSF "-+#0 " 1097 1098 /* valid flags for o, x, and X conversions */ 1099 #define L_FMTFLAGSX "-#0" 1100 1101 /* valid flags for d and i conversions */ 1102 #define L_FMTFLAGSI "-+0 " 1103 1104 /* valid flags for u conversions */ 1105 #define L_FMTFLAGSU "-0" 1106 1107 /* valid flags for c, p, and s conversions */ 1108 #define L_FMTFLAGSC "-" 1109 1110 #endif 1111 1112 1113 /* 1114 ** Maximum size of each format specification (such as "%-099.99d"): 1115 ** Initial '%', flags (up to 5), width (2), period, precision (2), 1116 ** length modifier (8), conversion specifier, and final '\0', plus some 1117 ** extra. 1118 */ 1119 #define MAX_FORMAT 32 1120 1121 1122 static void addquoted (luaL_Buffer *b, const char *s, size_t len) { 1123 luaL_addchar(b, '"'); 1124 while (len--) { 1125 if (*s == '"' || *s == '\\' || *s == '\n') { 1126 luaL_addchar(b, '\\'); 1127 luaL_addchar(b, *s); 1128 } 1129 else if (iscntrl(uchar(*s))) { 1130 char buff[10]; 1131 if (!isdigit(uchar(*(s+1)))) 1132 l_sprintf(buff, sizeof(buff), "\\%d", (int)uchar(*s)); 1133 else 1134 l_sprintf(buff, sizeof(buff), "\\%03d", (int)uchar(*s)); 1135 luaL_addstring(b, buff); 1136 } 1137 else 1138 luaL_addchar(b, *s); 1139 s++; 1140 } 1141 luaL_addchar(b, '"'); 1142 } 1143 1144 1145 #ifndef LUA_AVOID_FLOAT 1146 /* 1147 ** Serialize a floating-point number in such a way that it can be 1148 ** scanned back by Lua. Use hexadecimal format for "common" numbers 1149 ** (to preserve precision); inf, -inf, and NaN are handled separately. 1150 ** (NaN cannot be expressed as a numeral, so we write '(0/0)' for it.) 1151 */ 1152 static int quotefloat (lua_State *L, char *buff, lua_Number n) { 1153 const char *s; /* for the fixed representations */ 1154 if (n == (lua_Number)HUGE_VAL) /* inf? */ 1155 s = "1e9999"; 1156 else if (n == -(lua_Number)HUGE_VAL) /* -inf? */ 1157 s = "-1e9999"; 1158 else if (n != n) /* NaN? */ 1159 s = "(0/0)"; 1160 else { /* format number as hexadecimal */ 1161 int nb = lua_number2strx(L, buff, MAX_ITEM, 1162 "%" LUA_NUMBER_FRMLEN "a", n); 1163 /* ensures that 'buff' string uses a dot as the radix character */ 1164 if (memchr(buff, '.', nb) == NULL) { /* no dot? */ 1165 char point = lua_getlocaledecpoint(); /* try locale point */ 1166 char *ppoint = (char *)memchr(buff, point, nb); 1167 if (ppoint) *ppoint = '.'; /* change it to a dot */ 1168 } 1169 return nb; 1170 } 1171 /* for the fixed representations */ 1172 return l_sprintf(buff, MAX_ITEM, "%s", s); 1173 } 1174 #endif 1175 1176 1177 static void addliteral (lua_State *L, luaL_Buffer *b, int arg) { 1178 switch (lua_type(L, arg)) { 1179 case LUA_TSTRING: { 1180 size_t len; 1181 const char *s = lua_tolstring(L, arg, &len); 1182 addquoted(b, s, len); 1183 break; 1184 } 1185 case LUA_TNUMBER: { 1186 char *buff = luaL_prepbuffsize(b, MAX_ITEM); 1187 int nb; 1188 #ifndef LUA_AVOID_FLOAT 1189 if (!lua_isinteger(L, arg)) /* float? */ 1190 nb = quotefloat(L, buff, lua_tonumber(L, arg)); 1191 else { /* integers */ 1192 #else 1193 { 1194 #endif 1195 lua_Integer n = lua_tointeger(L, arg); 1196 const char *format = (n == LUA_MININTEGER) /* corner case? */ 1197 ? "0x%" LUA_INTEGER_FRMLEN "x" /* use hex */ 1198 : LUA_INTEGER_FMT; /* else use default format */ 1199 nb = l_sprintf(buff, MAX_ITEM, format, (LUAI_UACINT)n); 1200 } 1201 luaL_addsize(b, nb); 1202 break; 1203 } 1204 case LUA_TNIL: case LUA_TBOOLEAN: { 1205 luaL_tolstring(L, arg, NULL); 1206 luaL_addvalue(b); 1207 break; 1208 } 1209 default: { 1210 luaL_argerror(L, arg, "value has no literal form"); 1211 } 1212 } 1213 } 1214 1215 1216 static const char *get2digits (const char *s) { 1217 if (isdigit(uchar(*s))) { 1218 s++; 1219 if (isdigit(uchar(*s))) s++; /* (2 digits at most) */ 1220 } 1221 return s; 1222 } 1223 1224 1225 /* 1226 ** Check whether a conversion specification is valid. When called, 1227 ** first character in 'form' must be '%' and last character must 1228 ** be a valid conversion specifier. 'flags' are the accepted flags; 1229 ** 'precision' signals whether to accept a precision. 1230 */ 1231 static void checkformat (lua_State *L, const char *form, const char *flags, 1232 int precision) { 1233 const char *spec = form + 1; /* skip '%' */ 1234 spec += strspn(spec, flags); /* skip flags */ 1235 if (*spec != '0') { /* a width cannot start with '0' */ 1236 spec = get2digits(spec); /* skip width */ 1237 if (*spec == '.' && precision) { 1238 spec++; 1239 spec = get2digits(spec); /* skip precision */ 1240 } 1241 } 1242 if (!isalpha(uchar(*spec))) /* did not go to the end? */ 1243 luaL_error(L, "invalid conversion specification: '%s'", form); 1244 } 1245 1246 1247 /* 1248 ** Get a conversion specification and copy it to 'form'. 1249 ** Return the address of its last character. 1250 */ 1251 static const char *getformat (lua_State *L, const char *strfrmt, 1252 char *form) { 1253 /* spans flags, width, and precision ('0' is included as a flag) */ 1254 size_t len = strspn(strfrmt, L_FMTFLAGSF "123456789."); 1255 len++; /* adds following character (should be the specifier) */ 1256 /* still needs space for '%', '\0', plus a length modifier */ 1257 if (len >= MAX_FORMAT - 10) 1258 luaL_error(L, "invalid format (too long)"); 1259 *(form++) = '%'; 1260 memcpy(form, strfrmt, len * sizeof(char)); 1261 *(form + len) = '\0'; 1262 return strfrmt + len - 1; 1263 } 1264 1265 1266 /* 1267 ** add length modifier into formats 1268 */ 1269 static void addlenmod (char *form, const char *lenmod) { 1270 size_t l = strlen(form); 1271 size_t lm = strlen(lenmod); 1272 char spec = form[l - 1]; 1273 strcpy(form + l - 1, lenmod); 1274 form[l + lm - 1] = spec; 1275 form[l + lm] = '\0'; 1276 } 1277 1278 1279 static int str_format (lua_State *L) { 1280 int top = lua_gettop(L); 1281 int arg = 1; 1282 size_t sfl; 1283 const char *strfrmt = luaL_checklstring(L, arg, &sfl); 1284 const char *strfrmt_end = strfrmt+sfl; 1285 const char *flags; 1286 luaL_Buffer b; 1287 luaL_buffinit(L, &b); 1288 while (strfrmt < strfrmt_end) { 1289 if (*strfrmt != L_ESC) 1290 luaL_addchar(&b, *strfrmt++); 1291 else if (*++strfrmt == L_ESC) 1292 luaL_addchar(&b, *strfrmt++); /* %% */ 1293 else { /* format item */ 1294 char form[MAX_FORMAT]; /* to store the format ('%...') */ 1295 int maxitem = MAX_ITEM; /* maximum length for the result */ 1296 char *buff = luaL_prepbuffsize(&b, maxitem); /* to put result */ 1297 int nb = 0; /* number of bytes in result */ 1298 if (++arg > top) 1299 return luaL_argerror(L, arg, "no value"); 1300 strfrmt = getformat(L, strfrmt, form); 1301 switch (*strfrmt++) { 1302 case 'c': { 1303 checkformat(L, form, L_FMTFLAGSC, 0); 1304 nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg)); 1305 break; 1306 } 1307 case 'd': case 'i': 1308 flags = L_FMTFLAGSI; 1309 goto intcase; 1310 case 'u': 1311 flags = L_FMTFLAGSU; 1312 goto intcase; 1313 case 'o': case 'x': case 'X': 1314 flags = L_FMTFLAGSX; 1315 intcase: { 1316 lua_Integer n = luaL_checkinteger(L, arg); 1317 checkformat(L, form, flags, 1); 1318 addlenmod(form, LUA_INTEGER_FRMLEN); 1319 nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n); 1320 break; 1321 } 1322 case 'a': case 'A': 1323 checkformat(L, form, L_FMTFLAGSF, 1); 1324 addlenmod(form, LUA_NUMBER_FRMLEN); 1325 nb = lua_number2strx(L, buff, maxitem, form, 1326 luaL_checknumber(L, arg)); 1327 break; 1328 #ifndef LUA_AVOID_FLOAT 1329 case 'f': 1330 maxitem = MAX_ITEMF; /* extra space for '%f' */ 1331 buff = luaL_prepbuffsize(&b, maxitem); 1332 /* FALLTHROUGH */ 1333 #endif 1334 case 'e': case 'E': case 'g': case 'G': { 1335 lua_Number n = luaL_checknumber(L, arg); 1336 checkformat(L, form, L_FMTFLAGSF, 1); 1337 addlenmod(form, LUA_NUMBER_FRMLEN); 1338 nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n); 1339 break; 1340 } 1341 case 'p': { 1342 const void *p = lua_topointer(L, arg); 1343 checkformat(L, form, L_FMTFLAGSC, 0); 1344 if (p == NULL) { /* avoid calling 'printf' with argument NULL */ 1345 p = "(null)"; /* result */ 1346 form[strlen(form) - 1] = 's'; /* format it as a string */ 1347 } 1348 nb = l_sprintf(buff, maxitem, form, p); 1349 break; 1350 } 1351 case 'q': { 1352 if (form[2] != '\0') /* modifiers? */ 1353 return luaL_error(L, "specifier '%%q' cannot have modifiers"); 1354 addliteral(L, &b, arg); 1355 break; 1356 } 1357 case 's': { 1358 size_t l; 1359 const char *s = luaL_tolstring(L, arg, &l); 1360 if (form[2] == '\0') /* no modifiers? */ 1361 luaL_addvalue(&b); /* keep entire string */ 1362 else { 1363 luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); 1364 checkformat(L, form, L_FMTFLAGSC, 1); 1365 if (strchr(form, '.') == NULL && l >= 100) { 1366 /* no precision and string is too long to be formatted */ 1367 luaL_addvalue(&b); /* keep entire string */ 1368 } 1369 else { /* format the string into 'buff' */ 1370 nb = l_sprintf(buff, maxitem, form, s); 1371 lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 1372 } 1373 } 1374 break; 1375 } 1376 default: { /* also treat cases 'pnLlh' */ 1377 return luaL_error(L, "invalid conversion '%s' to 'format'", form); 1378 } 1379 } 1380 lua_assert(nb < maxitem); 1381 luaL_addsize(&b, nb); 1382 } 1383 } 1384 luaL_pushresult(&b); 1385 return 1; 1386 } 1387 1388 /* }====================================================== */ 1389 1390 1391 /* 1392 ** {====================================================== 1393 ** PACK/UNPACK 1394 ** ======================================================= 1395 */ 1396 1397 1398 /* value used for padding */ 1399 #if !defined(LUAL_PACKPADBYTE) 1400 #define LUAL_PACKPADBYTE 0x00 1401 #endif 1402 1403 /* maximum size for the binary representation of an integer */ 1404 #define MAXINTSIZE 16 1405 1406 /* number of bits in a character */ 1407 #define NB CHAR_BIT 1408 1409 /* mask for one character (NB 1's) */ 1410 #define MC ((1 << NB) - 1) 1411 1412 /* size of a lua_Integer */ 1413 #define SZINT ((int)sizeof(lua_Integer)) 1414 1415 1416 /* dummy union to get native endianness */ 1417 static const union { 1418 int dummy; 1419 char little; /* true iff machine is little endian */ 1420 } nativeendian = {1}; 1421 1422 1423 /* 1424 ** information to pack/unpack stuff 1425 */ 1426 typedef struct Header { 1427 lua_State *L; 1428 int islittle; 1429 int maxalign; 1430 } Header; 1431 1432 1433 /* 1434 ** options for pack/unpack 1435 */ 1436 typedef enum KOption { 1437 Kint, /* signed integers */ 1438 Kuint, /* unsigned integers */ 1439 #ifndef LUA_AVOID_FLOAT 1440 Kfloat, /* single-precision floating-point numbers */ 1441 Kdouble, /* double-precision floating-point numbers */ 1442 #endif 1443 Knumber, /* Lua "native" floating-point numbers */ 1444 Kchar, /* fixed-length strings */ 1445 Kstring, /* strings with prefixed length */ 1446 Kzstr, /* zero-terminated strings */ 1447 Kpadding, /* padding */ 1448 Kpaddalign, /* padding for alignment */ 1449 Knop /* no-op (configuration or spaces) */ 1450 } KOption; 1451 1452 1453 /* 1454 ** Read an integer numeral from string 'fmt' or return 'df' if 1455 ** there is no numeral 1456 */ 1457 static int digit (int c) { return '0' <= c && c <= '9'; } 1458 1459 static int getnum (const char **fmt, int df) { 1460 if (!digit(**fmt)) /* no number? */ 1461 return df; /* return default value */ 1462 else { 1463 int a = 0; 1464 do { 1465 a = a*10 + (*((*fmt)++) - '0'); 1466 } while (digit(**fmt) && a <= ((int)MAXSIZE - 9)/10); 1467 return a; 1468 } 1469 } 1470 1471 1472 /* 1473 ** Read an integer numeral and raises an error if it is larger 1474 ** than the maximum size for integers. 1475 */ 1476 static int getnumlimit (Header *h, const char **fmt, int df) { 1477 int sz = getnum(fmt, df); 1478 if (l_unlikely(sz > MAXINTSIZE || sz <= 0)) 1479 return luaL_error(h->L, "integral size (%d) out of limits [1,%d]", 1480 sz, MAXINTSIZE); 1481 return sz; 1482 } 1483 1484 1485 /* 1486 ** Initialize Header 1487 */ 1488 static void initheader (lua_State *L, Header *h) { 1489 h->L = L; 1490 h->islittle = nativeendian.little; 1491 h->maxalign = 1; 1492 } 1493 1494 1495 /* 1496 ** Read and classify next option. 'size' is filled with option's size. 1497 */ 1498 static KOption getoption (Header *h, const char **fmt, int *size) { 1499 /* dummy structure to get native alignment requirements */ 1500 struct cD { char c; union { LUAI_MAXALIGN; } u; }; 1501 int opt = *((*fmt)++); 1502 *size = 0; /* default */ 1503 switch (opt) { 1504 case 'b': *size = sizeof(char); return Kint; 1505 case 'B': *size = sizeof(char); return Kuint; 1506 case 'h': *size = sizeof(short); return Kint; 1507 case 'H': *size = sizeof(short); return Kuint; 1508 case 'l': *size = sizeof(long); return Kint; 1509 case 'L': *size = sizeof(long); return Kuint; 1510 case 'j': *size = sizeof(lua_Integer); return Kint; 1511 case 'J': *size = sizeof(lua_Integer); return Kuint; 1512 case 'T': *size = sizeof(size_t); return Kuint; 1513 #ifndef LUA_AVOID_FLOAT 1514 case 'f': *size = sizeof(float); return Kfloat; 1515 case 'd': *size = sizeof(double); return Kdouble; 1516 #endif 1517 case 'n': *size = sizeof(lua_Number); return Knumber; 1518 case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint; 1519 case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint; 1520 case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring; 1521 case 'c': 1522 *size = getnum(fmt, -1); 1523 if (l_unlikely(*size == -1)) 1524 luaL_error(h->L, "missing size for format option 'c'"); 1525 return Kchar; 1526 case 'z': return Kzstr; 1527 case 'x': *size = 1; return Kpadding; 1528 case 'X': return Kpaddalign; 1529 case ' ': break; 1530 case '<': h->islittle = 1; break; 1531 case '>': h->islittle = 0; break; 1532 case '=': h->islittle = nativeendian.little; break; 1533 case '!': { 1534 const int maxalign = offsetof(struct cD, u); 1535 h->maxalign = getnumlimit(h, fmt, maxalign); 1536 break; 1537 } 1538 default: luaL_error(h->L, "invalid format option '%c'", opt); 1539 } 1540 return Knop; 1541 } 1542 1543 1544 /* 1545 ** Read, classify, and fill other details about the next option. 1546 ** 'psize' is filled with option's size, 'notoalign' with its 1547 ** alignment requirements. 1548 ** Local variable 'size' gets the size to be aligned. (Kpadal option 1549 ** always gets its full alignment, other options are limited by 1550 ** the maximum alignment ('maxalign'). Kchar option needs no alignment 1551 ** despite its size. 1552 */ 1553 static KOption getdetails (Header *h, size_t totalsize, 1554 const char **fmt, int *psize, int *ntoalign) { 1555 KOption opt = getoption(h, fmt, psize); 1556 int align = *psize; /* usually, alignment follows size */ 1557 if (opt == Kpaddalign) { /* 'X' gets alignment from following option */ 1558 if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0) 1559 luaL_argerror(h->L, 1, "invalid next option for option 'X'"); 1560 } 1561 if (align <= 1 || opt == Kchar) /* need no alignment? */ 1562 *ntoalign = 0; 1563 else { 1564 if (align > h->maxalign) /* enforce maximum alignment */ 1565 align = h->maxalign; 1566 if (l_unlikely((align & (align - 1)) != 0)) /* not a power of 2? */ 1567 luaL_argerror(h->L, 1, "format asks for alignment not power of 2"); 1568 *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1); 1569 } 1570 return opt; 1571 } 1572 1573 1574 /* 1575 ** Pack integer 'n' with 'size' bytes and 'islittle' endianness. 1576 ** The final 'if' handles the case when 'size' is larger than 1577 ** the size of a Lua integer, correcting the extra sign-extension 1578 ** bytes if necessary (by default they would be zeros). 1579 */ 1580 static void packint (luaL_Buffer *b, lua_Unsigned n, 1581 int islittle, int size, int neg) { 1582 char *buff = luaL_prepbuffsize(b, size); 1583 int i; 1584 buff[islittle ? 0 : size - 1] = (char)(n & MC); /* first byte */ 1585 for (i = 1; i < size; i++) { 1586 n >>= NB; 1587 buff[islittle ? i : size - 1 - i] = (char)(n & MC); 1588 } 1589 if (neg && size > SZINT) { /* negative number need sign extension? */ 1590 for (i = SZINT; i < size; i++) /* correct extra bytes */ 1591 buff[islittle ? i : size - 1 - i] = (char)MC; 1592 } 1593 luaL_addsize(b, size); /* add result to buffer */ 1594 } 1595 1596 1597 /* 1598 ** Copy 'size' bytes from 'src' to 'dest', correcting endianness if 1599 ** given 'islittle' is different from native endianness. 1600 */ 1601 static void copywithendian (char *dest, const char *src, 1602 int size, int islittle) { 1603 if (islittle == nativeendian.little) 1604 memcpy(dest, src, size); 1605 else { 1606 dest += size - 1; 1607 while (size-- != 0) 1608 *(dest--) = *(src++); 1609 } 1610 } 1611 1612 1613 static int str_pack (lua_State *L) { 1614 luaL_Buffer b; 1615 Header h; 1616 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1617 int arg = 1; /* current argument to pack */ 1618 size_t totalsize = 0; /* accumulate total size of result */ 1619 initheader(L, &h); 1620 lua_pushnil(L); /* mark to separate arguments from string buffer */ 1621 luaL_buffinit(L, &b); 1622 while (*fmt != '\0') { 1623 int size, ntoalign; 1624 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1625 totalsize += ntoalign + size; 1626 while (ntoalign-- > 0) 1627 luaL_addchar(&b, LUAL_PACKPADBYTE); /* fill alignment */ 1628 arg++; 1629 switch (opt) { 1630 case Kint: { /* signed integers */ 1631 lua_Integer n = luaL_checkinteger(L, arg); 1632 if (size < SZINT) { /* need overflow check? */ 1633 lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1); 1634 luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow"); 1635 } 1636 packint(&b, (lua_Unsigned)n, h.islittle, size, (n < 0)); 1637 break; 1638 } 1639 case Kuint: { /* unsigned integers */ 1640 lua_Integer n = luaL_checkinteger(L, arg); 1641 if (size < SZINT) /* need overflow check? */ 1642 luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)), 1643 arg, "unsigned overflow"); 1644 packint(&b, (lua_Unsigned)n, h.islittle, size, 0); 1645 break; 1646 } 1647 #ifndef LUA_AVOID_FLOAT 1648 case Kfloat: { /* C float */ 1649 float f = (float)luaL_checknumber(L, arg); /* get argument */ 1650 char *buff = luaL_prepbuffsize(&b, sizeof(f)); 1651 /* move 'f' to final result, correcting endianness if needed */ 1652 copywithendian(buff, (char *)&f, sizeof(f), h.islittle); 1653 luaL_addsize(&b, size); 1654 break; 1655 } 1656 #endif 1657 case Knumber: { /* Lua float */ 1658 lua_Number f = luaL_checknumber(L, arg); /* get argument */ 1659 char *buff = luaL_prepbuffsize(&b, sizeof(f)); 1660 /* move 'f' to final result, correcting endianness if needed */ 1661 copywithendian(buff, (char *)&f, sizeof(f), h.islittle); 1662 luaL_addsize(&b, size); 1663 break; 1664 } 1665 #ifndef LUA_AVOID_FLOAT 1666 case Kdouble: { /* C double */ 1667 double f = (double)luaL_checknumber(L, arg); /* get argument */ 1668 char *buff = luaL_prepbuffsize(&b, sizeof(f)); 1669 /* move 'f' to final result, correcting endianness if needed */ 1670 copywithendian(buff, (char *)&f, sizeof(f), h.islittle); 1671 luaL_addsize(&b, size); 1672 break; 1673 } 1674 #endif 1675 case Kchar: { /* fixed-size string */ 1676 size_t len; 1677 const char *s = luaL_checklstring(L, arg, &len); 1678 luaL_argcheck(L, len <= (size_t)size, arg, 1679 "string longer than given size"); 1680 luaL_addlstring(&b, s, len); /* add string */ 1681 while (len++ < (size_t)size) /* pad extra space */ 1682 luaL_addchar(&b, LUAL_PACKPADBYTE); 1683 break; 1684 } 1685 case Kstring: { /* strings with length count */ 1686 size_t len; 1687 const char *s = luaL_checklstring(L, arg, &len); 1688 luaL_argcheck(L, size >= (int)sizeof(size_t) || 1689 len < ((size_t)1 << (size * NB)), 1690 arg, "string length does not fit in given size"); 1691 packint(&b, (lua_Unsigned)len, h.islittle, size, 0); /* pack length */ 1692 luaL_addlstring(&b, s, len); 1693 totalsize += len; 1694 break; 1695 } 1696 case Kzstr: { /* zero-terminated string */ 1697 size_t len; 1698 const char *s = luaL_checklstring(L, arg, &len); 1699 luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros"); 1700 luaL_addlstring(&b, s, len); 1701 luaL_addchar(&b, '\0'); /* add zero at the end */ 1702 totalsize += len + 1; 1703 break; 1704 } 1705 case Kpadding: luaL_addchar(&b, LUAL_PACKPADBYTE); /* FALLTHROUGH */ 1706 case Kpaddalign: case Knop: 1707 arg--; /* undo increment */ 1708 break; 1709 } 1710 } 1711 luaL_pushresult(&b); 1712 return 1; 1713 } 1714 1715 1716 static int str_packsize (lua_State *L) { 1717 Header h; 1718 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1719 size_t totalsize = 0; /* accumulate total size of result */ 1720 initheader(L, &h); 1721 while (*fmt != '\0') { 1722 int size, ntoalign; 1723 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1724 luaL_argcheck(L, opt != Kstring && opt != Kzstr, 1, 1725 "variable-length format"); 1726 size += ntoalign; /* total space used by option */ 1727 luaL_argcheck(L, totalsize <= MAXSIZE - size, 1, 1728 "format result too large"); 1729 totalsize += size; 1730 } 1731 lua_pushinteger(L, (lua_Integer)totalsize); 1732 return 1; 1733 } 1734 1735 1736 /* 1737 ** Unpack an integer with 'size' bytes and 'islittle' endianness. 1738 ** If size is smaller than the size of a Lua integer and integer 1739 ** is signed, must do sign extension (propagating the sign to the 1740 ** higher bits); if size is larger than the size of a Lua integer, 1741 ** it must check the unread bytes to see whether they do not cause an 1742 ** overflow. 1743 */ 1744 static lua_Integer unpackint (lua_State *L, const char *str, 1745 int islittle, int size, int issigned) { 1746 lua_Unsigned res = 0; 1747 int i; 1748 int limit = (size <= SZINT) ? size : SZINT; 1749 for (i = limit - 1; i >= 0; i--) { 1750 res <<= NB; 1751 res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i]; 1752 } 1753 if (size < SZINT) { /* real size smaller than lua_Integer? */ 1754 if (issigned) { /* needs sign extension? */ 1755 lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1); 1756 res = ((res ^ mask) - mask); /* do sign extension */ 1757 } 1758 } 1759 else if (size > SZINT) { /* must check unread bytes */ 1760 int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC; 1761 for (i = limit; i < size; i++) { 1762 if (l_unlikely((unsigned char)str[islittle ? i : size - 1 - i] != mask)) 1763 luaL_error(L, "%d-byte integer does not fit into Lua Integer", size); 1764 } 1765 } 1766 return (lua_Integer)res; 1767 } 1768 1769 1770 static int str_unpack (lua_State *L) { 1771 Header h; 1772 const char *fmt = luaL_checkstring(L, 1); 1773 size_t ld; 1774 const char *data = luaL_checklstring(L, 2, &ld); 1775 size_t pos = posrelatI(luaL_optinteger(L, 3, 1), ld) - 1; 1776 int n = 0; /* number of results */ 1777 luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); 1778 initheader(L, &h); 1779 while (*fmt != '\0') { 1780 int size, ntoalign; 1781 KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign); 1782 luaL_argcheck(L, (size_t)ntoalign + size <= ld - pos, 2, 1783 "data string too short"); 1784 pos += ntoalign; /* skip alignment */ 1785 /* stack space for item + next position */ 1786 luaL_checkstack(L, 2, "too many results"); 1787 n++; 1788 switch (opt) { 1789 case Kint: 1790 case Kuint: { 1791 lua_Integer res = unpackint(L, data + pos, h.islittle, size, 1792 (opt == Kint)); 1793 lua_pushinteger(L, res); 1794 break; 1795 } 1796 #ifndef LUA_AVOID_FLOAT 1797 case Kfloat: { 1798 float f; 1799 copywithendian((char *)&f, data + pos, sizeof(f), h.islittle); 1800 lua_pushnumber(L, (lua_Number)f); 1801 break; 1802 } 1803 #endif 1804 case Knumber: { 1805 lua_Number f; 1806 copywithendian((char *)&f, data + pos, sizeof(f), h.islittle); 1807 lua_pushnumber(L, f); 1808 break; 1809 } 1810 #ifndef LUA_AVOID_FLOAT 1811 case Kdouble: { 1812 double f; 1813 copywithendian((char *)&f, data + pos, sizeof(f), h.islittle); 1814 lua_pushnumber(L, (lua_Number)f); 1815 break; 1816 } 1817 #endif 1818 case Kchar: { 1819 lua_pushlstring(L, data + pos, size); 1820 break; 1821 } 1822 case Kstring: { 1823 size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0); 1824 luaL_argcheck(L, len <= ld - pos - size, 2, "data string too short"); 1825 lua_pushlstring(L, data + pos + size, len); 1826 pos += len; /* skip string */ 1827 break; 1828 } 1829 case Kzstr: { 1830 size_t len = strlen(data + pos); 1831 luaL_argcheck(L, pos + len < ld, 2, 1832 "unfinished string for format 'z'"); 1833 lua_pushlstring(L, data + pos, len); 1834 pos += len + 1; /* skip string plus final '\0' */ 1835 break; 1836 } 1837 case Kpaddalign: case Kpadding: case Knop: 1838 n--; /* undo increment */ 1839 break; 1840 } 1841 pos += size; 1842 } 1843 lua_pushinteger(L, pos + 1); /* next position */ 1844 return n + 1; 1845 } 1846 1847 /* }====================================================== */ 1848 1849 1850 static const luaL_Reg strlib[] = { 1851 {"byte", str_byte}, 1852 {"char", str_char}, 1853 {"dump", str_dump}, 1854 {"find", str_find}, 1855 {"format", str_format}, 1856 {"gmatch", gmatch}, 1857 {"gsub", str_gsub}, 1858 {"len", str_len}, 1859 {"lower", str_lower}, 1860 {"match", str_match}, 1861 {"rep", str_rep}, 1862 {"reverse", str_reverse}, 1863 {"sub", str_sub}, 1864 {"upper", str_upper}, 1865 {"pack", str_pack}, 1866 {"packsize", str_packsize}, 1867 {"unpack", str_unpack}, 1868 {NULL, NULL} 1869 }; 1870 1871 1872 static void createmetatable (lua_State *L) { 1873 /* table to be metatable for strings */ 1874 luaL_newlibtable(L, stringmetamethods); 1875 luaL_setfuncs(L, stringmetamethods, 0); 1876 lua_pushliteral(L, ""); /* dummy string */ 1877 lua_pushvalue(L, -2); /* copy table */ 1878 lua_setmetatable(L, -2); /* set table as metatable for strings */ 1879 lua_pop(L, 1); /* pop dummy string */ 1880 lua_pushvalue(L, -2); /* get string library */ 1881 lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1882 lua_pop(L, 1); /* pop metatable */ 1883 } 1884 1885 1886 /* 1887 ** Open string library 1888 */ 1889 LUAMOD_API int luaopen_string (lua_State *L) { 1890 luaL_newlib(L, strlib); 1891 createmetatable(L); 1892 return 1; 1893 } 1894 1895