1 /* 2 ** $Id: lstrlib.c $ 3 ** Standard library for string operations and pattern-matching 4 ** See Copyright Notice in lua.h 5 */ 6 7 #define lstrlib_c 8 #define LUA_LIB 9 10 #include "lprefix.h" 11 12 13 #include <ctype.h> 14 #include <float.h> 15 #include <limits.h> 16 #include <locale.h> 17 #include <math.h> 18 #include <stddef.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "lua.h" 24 25 #include "lauxlib.h" 26 #include "lualib.h" 27 28 29 /* 30 ** maximum number of captures that a pattern can do during 31 ** pattern-matching. This limit is arbitrary, but must fit in 32 ** an unsigned char. 33 */ 34 #if !defined(LUA_MAXCAPTURES) 35 #define LUA_MAXCAPTURES 32 36 #endif 37 38 39 /* macro to 'unsign' a character */ 40 #define uchar(c) ((unsigned char)(c)) 41 42 43 /* 44 ** Some sizes are better limited to fit in 'int', but must also fit in 45 ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) 46 */ 47 #define MAX_SIZET ((size_t)(~(size_t)0)) 48 49 #define MAXSIZE \ 50 (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) 51 52 53 54 55 static int str_len (lua_State *L) { 56 size_t l; 57 luaL_checklstring(L, 1, &l); 58 lua_pushinteger(L, (lua_Integer)l); 59 return 1; 60 } 61 62 63 /* 64 ** translate a relative initial string position 65 ** (negative means back from end): clip result to [1, inf). 66 ** The length of any string in Lua must fit in a lua_Integer, 67 ** so there are no overflows in the casts. 68 ** The inverted comparison avoids a possible overflow 69 ** computing '-pos'. 70 */ 71 static size_t posrelatI (lua_Integer pos, size_t len) { 72 if (pos > 0) 73 return (size_t)pos; 74 else if (pos == 0) 75 return 1; 76 else if (pos < -(lua_Integer)len) /* inverted comparison */ 77 return 1; /* clip to 1 */ 78 else return len + (size_t)pos + 1; 79 } 80 81 82 /* 83 ** Gets an optional ending string position from argument 'arg', 84 ** with default value 'def'. 85 ** Negative means back from end: clip result to [0, len] 86 */ 87 static size_t getendpos (lua_State *L, int arg, lua_Integer def, 88 size_t len) { 89 lua_Integer pos = luaL_optinteger(L, arg, def); 90 if (pos > (lua_Integer)len) 91 return len; 92 else if (pos >= 0) 93 return (size_t)pos; 94 else if (pos < -(lua_Integer)len) 95 return 0; 96 else return len + (size_t)pos + 1; 97 } 98 99 100 static int str_sub (lua_State *L) { 101 size_t l; 102 const char *s = luaL_checklstring(L, 1, &l); 103 size_t start = posrelatI(luaL_checkinteger(L, 2), l); 104 size_t end = getendpos(L, 3, -1, l); 105 if (start <= end) 106 lua_pushlstring(L, s + start - 1, (end - start) + 1); 107 else lua_pushliteral(L, ""); 108 return 1; 109 } 110 111 112 static int str_reverse (lua_State *L) { 113 size_t l, i; 114 luaL_Buffer b; 115 const char *s = luaL_checklstring(L, 1, &l); 116 char *p = luaL_buffinitsize(L, &b, l); 117 for (i = 0; i < l; i++) 118 p[i] = s[l - i - 1]; 119 luaL_pushresultsize(&b, l); 120 return 1; 121 } 122 123 124 static int str_lower (lua_State *L) { 125 size_t l; 126 size_t i; 127 luaL_Buffer b; 128 const char *s = luaL_checklstring(L, 1, &l); 129 char *p = luaL_buffinitsize(L, &b, l); 130 for (i=0; i<l; i++) 131 p[i] = tolower(uchar(s[i])); 132 luaL_pushresultsize(&b, l); 133 return 1; 134 } 135 136 137 static int str_upper (lua_State *L) { 138 size_t l; 139 size_t i; 140 luaL_Buffer b; 141 const char *s = luaL_checklstring(L, 1, &l); 142 char *p = luaL_buffinitsize(L, &b, l); 143 for (i=0; i<l; i++) 144 p[i] = toupper(uchar(s[i])); 145 luaL_pushresultsize(&b, l); 146 return 1; 147 } 148 149 150 static int str_rep (lua_State *L) { 151 size_t l, lsep; 152 const char *s = luaL_checklstring(L, 1, &l); 153 lua_Integer n = luaL_checkinteger(L, 2); 154 const char *sep = luaL_optlstring(L, 3, "", &lsep); 155 if (n <= 0) lua_pushliteral(L, ""); 156 else if (l + lsep < l || l + lsep > MAXSIZE / n) /* may overflow? */ 157 return luaL_error(L, "resulting string too large"); 158 else { 159 size_t totallen = (size_t)n * l + (size_t)(n - 1) * lsep; 160 luaL_Buffer b; 161 char *p = luaL_buffinitsize(L, &b, totallen); 162 while (n-- > 1) { /* first n-1 copies (followed by separator) */ 163 memcpy(p, s, l * sizeof(char)); p += l; 164 if (lsep > 0) { /* empty 'memcpy' is not that cheap */ 165 memcpy(p, sep, lsep * sizeof(char)); 166 p += lsep; 167 } 168 } 169 memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 170 luaL_pushresultsize(&b, totallen); 171 } 172 return 1; 173 } 174 175 176 static int str_byte (lua_State *L) { 177 size_t l; 178 const char *s = luaL_checklstring(L, 1, &l); 179 lua_Integer pi = luaL_optinteger(L, 2, 1); 180 size_t posi = posrelatI(pi, l); 181 size_t pose = getendpos(L, 3, pi, l); 182 int n, i; 183 if (posi > pose) return 0; /* empty interval; return no values */ 184 if (pose - posi >= (size_t)INT_MAX) /* arithmetic overflow? */ 185 return luaL_error(L, "string slice too long"); 186 n = (int)(pose - posi) + 1; 187 luaL_checkstack(L, n, "string slice too long"); 188 for (i=0; i<n; i++) 189 lua_pushinteger(L, uchar(s[posi+i-1])); 190 return n; 191 } 192 193 194 static int str_char (lua_State *L) { 195 int n = lua_gettop(L); /* number of arguments */ 196 int i; 197 luaL_Buffer b; 198 char *p = luaL_buffinitsize(L, &b, n); 199 for (i=1; i<=n; i++) { 200 lua_Unsigned c = (lua_Unsigned)luaL_checkinteger(L, i); 201 luaL_argcheck(L, c <= (lua_Unsigned)UCHAR_MAX, i, "value out of range"); 202 p[i - 1] = uchar(c); 203 } 204 luaL_pushresultsize(&b, n); 205 return 1; 206 } 207 208 209 /* 210 ** Buffer to store the result of 'string.dump'. It must be initialized 211 ** after the call to 'lua_dump', to ensure that the function is on the 212 ** top of the stack when 'lua_dump' is called. ('luaL_buffinit' might 213 ** push stuff.) 214 */ 215 struct str_Writer { 216 int init; /* true iff buffer has been initialized */ 217 luaL_Buffer B; 218 }; 219 220 221 static int writer (lua_State *L, const void *b, size_t size, void *ud) { 222 struct str_Writer *state = (struct str_Writer *)ud; 223 if (!state->init) { 224 state->init = 1; 225 luaL_buffinit(L, &state->B); 226 } 227 luaL_addlstring(&state->B, (const char *)b, size); 228 return 0; 229 } 230 231 232 static int str_dump (lua_State *L) { 233 struct str_Writer state; 234 int strip = lua_toboolean(L, 2); 235 luaL_checktype(L, 1, LUA_TFUNCTION); 236 lua_settop(L, 1); /* ensure function is on the top of the stack */ 237 state.init = 0; 238 if (lua_dump(L, writer, &state, strip) != 0) 239 return luaL_error(L, "unable to dump given function"); 240 luaL_pushresult(&state.B); 241 return 1; 242 } 243 244 245 246 /* 247 ** {====================================================== 248 ** METAMETHODS 249 ** ======================================================= 250 */ 251 252 #if defined(LUA_NOCVTS2N) /* { */ 253 254 /* no coercion from strings to numbers */ 255 256 static const luaL_Reg stringmetamethods[] = { 257 {"__index", NULL}, /* placeholder */ 258 {NULL, NULL} 259 }; 260 261 #else /* }{ */ 262 263 static int tonum (lua_State *L, int arg) { 264 if (lua_type(L, arg) == LUA_TNUMBER) { /* already a number? */ 265 lua_pushvalue(L, arg); 266 return 1; 267 } 268 else { /* check whether it is a numerical string */ 269 size_t len; 270 const char *s = lua_tolstring(L, arg, &len); 271 return (s != NULL && lua_stringtonumber(L, s) == len + 1); 272 } 273 } 274 275 276 static void trymt (lua_State *L, const char *mtname) { 277 lua_settop(L, 2); /* back to the original arguments */ 278 if (lua_type(L, 2) == LUA_TSTRING || !luaL_getmetafield(L, 2, mtname)) 279 luaL_error(L, "attempt to %s a '%s' with a '%s'", mtname + 2, 280 luaL_typename(L, -2), luaL_typename(L, -1)); 281 lua_insert(L, -3); /* put metamethod before arguments */ 282 lua_call(L, 2, 1); /* call metamethod */ 283 } 284 285 286 static int arith (lua_State *L, int op, const char *mtname) { 287 if (tonum(L, 1) && tonum(L, 2)) 288 lua_arith(L, op); /* result will be on the top */ 289 else 290 trymt(L, mtname); 291 return 1; 292 } 293 294 295 static int arith_add (lua_State *L) { 296 return arith(L, LUA_OPADD, "__add"); 297 } 298 299 static int arith_sub (lua_State *L) { 300 return arith(L, LUA_OPSUB, "__sub"); 301 } 302 303 static int arith_mul (lua_State *L) { 304 return arith(L, LUA_OPMUL, "__mul"); 305 } 306 307 static int arith_mod (lua_State *L) { 308 return arith(L, LUA_OPMOD, "__mod"); 309 } 310 311 static int arith_pow (lua_State *L) { 312 return arith(L, LUA_OPPOW, "__pow"); 313 } 314 315 static int arith_div (lua_State *L) { 316 return arith(L, LUA_OPDIV, "__div"); 317 } 318 319 static int arith_idiv (lua_State *L) { 320 return arith(L, LUA_OPIDIV, "__idiv"); 321 } 322 323 static int arith_unm (lua_State *L) { 324 return arith(L, LUA_OPUNM, "__unm"); 325 } 326 327 328 static const luaL_Reg stringmetamethods[] = { 329 {"__add", arith_add}, 330 {"__sub", arith_sub}, 331 {"__mul", arith_mul}, 332 {"__mod", arith_mod}, 333 {"__pow", arith_pow}, 334 {"__div", arith_div}, 335 {"__idiv", arith_idiv}, 336 {"__unm", arith_unm}, 337 {"__index", NULL}, /* placeholder */ 338 {NULL, NULL} 339 }; 340 341 #endif /* } */ 342 343 /* }====================================================== */ 344 345 /* 346 ** {====================================================== 347 ** PATTERN MATCHING 348 ** ======================================================= 349 */ 350 351 352 #define CAP_UNFINISHED (-1) 353 #define CAP_POSITION (-2) 354 355 356 typedef struct MatchState { 357 const char *src_init; /* init of source string */ 358 const char *src_end; /* end ('\0') of source string */ 359 const char *p_end; /* end ('\0') of pattern */ 360 lua_State *L; 361 int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 362 unsigned char level; /* total number of captures (finished or unfinished) */ 363 struct { 364 const char *init; 365 ptrdiff_t len; 366 } capture[LUA_MAXCAPTURES]; 367 } MatchState; 368 369 370 /* recursive function */ 371 static const char *match (MatchState *ms, const char *s, const char *p); 372 373 374 /* maximum recursion depth for 'match' */ 375 #if !defined(MAXCCALLS) 376 #define MAXCCALLS 200 377 #endif 378 379 380 #define L_ESC '%' 381 #define SPECIALS "^$*+?.([%-" 382 383 384 static int check_capture (MatchState *ms, int l) { 385 l -= '1'; 386 if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) 387 return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 388 return l; 389 } 390 391 392 static int capture_to_close (MatchState *ms) { 393 int level = ms->level; 394 for (level--; level>=0; level--) 395 if (ms->capture[level].len == CAP_UNFINISHED) return level; 396 return luaL_error(ms->L, "invalid pattern capture"); 397 } 398 399 400 static const char *classend (MatchState *ms, const char *p) { 401 switch (*p++) { 402 case L_ESC: { 403 if (p == ms->p_end) 404 luaL_error(ms->L, "malformed pattern (ends with '%%')"); 405 return p+1; 406 } 407 case '[': { 408 if (*p == '^') p++; 409 do { /* look for a ']' */ 410 if (p == ms->p_end) 411 luaL_error(ms->L, "malformed pattern (missing ']')"); 412 if (*(p++) == L_ESC && p < ms->p_end) 413 p++; /* skip escapes (e.g. '%]') */ 414 } while (*p != ']'); 415 return p+1; 416 } 417 default: { 418 return p; 419 } 420 } 421 } 422 423 424 static int match_class (int c, int cl) { 425 int res; 426 switch (tolower(cl)) { 427 case 'a' : res = isalpha(c); break; 428 case 'c' : res = iscntrl(c); break; 429 case 'd' : res = isdigit(c); break; 430 case 'g' : res = isgraph(c); break; 431 case 'l' : res = islower(c); break; 432 case 'p' : res = ispunct(c); break; 433 case 's' : res = isspace(c); break; 434 case 'u' : res = isupper(c); break; 435 case 'w' : res = isalnum(c); break; 436 case 'x' : res = isxdigit(c); break; 437 case 'z' : res = (c == 0); break; /* deprecated option */ 438 default: return (cl == c); 439 } 440 return (islower(cl) ? res : !res); 441 } 442 443 444 static int matchbracketclass (int c, const char *p, const char *ec) { 445 int sig = 1; 446 if (*(p+1) == '^') { 447 sig = 0; 448 p++; /* skip the '^' */ 449 } 450 while (++p < ec) { 451 if (*p == L_ESC) { 452 p++; 453 if (match_class(c, uchar(*p))) 454 return sig; 455 } 456 else if ((*(p+1) == '-') && (p+2 < ec)) { 457 p+=2; 458 if (uchar(*(p-2)) <= c && c <= uchar(*p)) 459 return sig; 460 } 461 else if (uchar(*p) == c) return sig; 462 } 463 return !sig; 464 } 465 466 467 static int singlematch (MatchState *ms, const char *s, const char *p, 468 const char *ep) { 469 if (s >= ms->src_end) 470 return 0; 471 else { 472 int c = uchar(*s); 473 switch (*p) { 474 case '.': return 1; /* matches any char */ 475 case L_ESC: return match_class(c, uchar(*(p+1))); 476 case '[': return matchbracketclass(c, p, ep-1); 477 default: return (uchar(*p) == c); 478 } 479 } 480 } 481 482 483 static const char *matchbalance (MatchState *ms, const char *s, 484 const char *p) { 485 if (p >= ms->p_end - 1) 486 luaL_error(ms->L, "malformed pattern (missing arguments to '%%b')"); 487 if (*s != *p) return NULL; 488 else { 489 int b = *p; 490 int e = *(p+1); 491 int cont = 1; 492 while (++s < ms->src_end) { 493 if (*s == e) { 494 if (--cont == 0) return s+1; 495 } 496 else if (*s == b) cont++; 497 } 498 } 499 return NULL; /* string ends out of balance */ 500 } 501 502 503 static const char *max_expand (MatchState *ms, const char *s, 504 const char *p, const char *ep) { 505 ptrdiff_t i = 0; /* counts maximum expand for item */ 506 while (singlematch(ms, s + i, p, ep)) 507 i++; 508 /* keeps trying to match with the maximum repetitions */ 509 while (i>=0) { 510 const char *res = match(ms, (s+i), ep+1); 511 if (res) return res; 512 i--; /* else didn't match; reduce 1 repetition to try again */ 513 } 514 return NULL; 515 } 516 517 518 static const char *min_expand (MatchState *ms, const char *s, 519 const char *p, const char *ep) { 520 for (;;) { 521 const char *res = match(ms, s, ep+1); 522 if (res != NULL) 523 return res; 524 else if (singlematch(ms, s, p, ep)) 525 s++; /* try with one more repetition */ 526 else return NULL; 527 } 528 } 529 530 531 static const char *start_capture (MatchState *ms, const char *s, 532 const char *p, int what) { 533 const char *res; 534 int level = ms->level; 535 if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 536 ms->capture[level].init = s; 537 ms->capture[level].len = what; 538 ms->level = level+1; 539 if ((res=match(ms, s, p)) == NULL) /* match failed? */ 540 ms->level--; /* undo capture */ 541 return res; 542 } 543 544 545 static const char *end_capture (MatchState *ms, const char *s, 546 const char *p) { 547 int l = capture_to_close(ms); 548 const char *res; 549 ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 550 if ((res = match(ms, s, p)) == NULL) /* match failed? */ 551 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 552 return res; 553 } 554 555 556 static const char *match_capture (MatchState *ms, const char *s, int l) { 557 size_t len; 558 l = check_capture(ms, l); 559 len = ms->capture[l].len; 560 if ((size_t)(ms->src_end-s) >= len && 561 memcmp(ms->capture[l].init, s, len) == 0) 562 return s+len; 563 else return NULL; 564 } 565 566 567 static const char *match (MatchState *ms, const char *s, const char *p) { 568 if (ms->matchdepth-- == 0) 569 luaL_error(ms->L, "pattern too complex"); 570 init: /* using goto's to optimize tail recursion */ 571 if (p != ms->p_end) { /* end of pattern? */ 572 switch (*p) { 573 case '(': { /* start capture */ 574 if (*(p + 1) == ')') /* position capture? */ 575 s = start_capture(ms, s, p + 2, CAP_POSITION); 576 else 577 s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 578 break; 579 } 580 case ')': { /* end capture */ 581 s = end_capture(ms, s, p + 1); 582 break; 583 } 584 case '$': { 585 if ((p + 1) != ms->p_end) /* is the '$' the last char in pattern? */ 586 goto dflt; /* no; go to default */ 587 s = (s == ms->src_end) ? s : NULL; /* check end of string */ 588 break; 589 } 590 case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 591 switch (*(p + 1)) { 592 case 'b': { /* balanced string? */ 593 s = matchbalance(ms, s, p + 2); 594 if (s != NULL) { 595 p += 4; goto init; /* return match(ms, s, p + 4); */ 596 } /* else fail (s == NULL) */ 597 break; 598 } 599 case 'f': { /* frontier? */ 600 const char *ep; char previous; 601 p += 2; 602 if (*p != '[') 603 luaL_error(ms->L, "missing '[' after '%%f' in pattern"); 604 ep = classend(ms, p); /* points to what is next */ 605 previous = (s == ms->src_init) ? '\0' : *(s - 1); 606 if (!matchbracketclass(uchar(previous), p, ep - 1) && 607 matchbracketclass(uchar(*s), p, ep - 1)) { 608 p = ep; goto init; /* return match(ms, s, ep); */ 609 } 610 s = NULL; /* match failed */ 611 break; 612 } 613 case '0': case '1': case '2': case '3': 614 case '4': case '5': case '6': case '7': 615 case '8': case '9': { /* capture results (%0-%9)? */ 616 s = match_capture(ms, s, uchar(*(p + 1))); 617 if (s != NULL) { 618 p += 2; goto init; /* return match(ms, s, p + 2) */ 619 } 620 break; 621 } 622 default: goto dflt; 623 } 624 break; 625 } 626 default: dflt: { /* pattern class plus optional suffix */ 627 const char *ep = classend(ms, p); /* points to optional suffix */ 628 /* does not match at least once? */ 629 if (!singlematch(ms, s, p, ep)) { 630 if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 631 p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 632 } 633 else /* '+' or no suffix */ 634 s = NULL; /* fail */ 635 } 636 else { /* matched once */ 637 switch (*ep) { /* handle optional suffix */ 638 case '?': { /* optional */ 639 const char *res; 640 if ((res = match(ms, s + 1, ep + 1)) != NULL) 641 s = res; 642 else { 643 p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 644 } 645 break; 646 } 647 case '+': /* 1 or more repetitions */ 648 s++; /* 1 match already done */ 649 /* FALLTHROUGH */ 650 case '*': /* 0 or more repetitions */ 651 s = max_expand(ms, s, p, ep); 652 break; 653 case '-': /* 0 or more repetitions (minimum) */ 654 s = min_expand(ms, s, p, ep); 655 break; 656 default: /* no suffix */ 657 s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 658 } 659 } 660 break; 661 } 662 } 663 } 664 ms->matchdepth++; 665 return s; 666 } 667 668 669 670 static const char *lmemfind (const char *s1, size_t l1, 671 const char *s2, size_t l2) { 672 if (l2 == 0) return s1; /* empty strings are everywhere */ 673 else if (l2 > l1) return NULL; /* avoids a negative 'l1' */ 674 else { 675 const char *init; /* to search for a '*s2' inside 's1' */ 676 l2--; /* 1st char will be checked by 'memchr' */ 677 l1 = l1-l2; /* 's2' cannot be found after that */ 678 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 679 init++; /* 1st char is already checked */ 680 if (memcmp(init, s2+1, l2) == 0) 681 return init-1; 682 else { /* correct 'l1' and 's1' to try again */ 683 l1 -= init-s1; 684 s1 = init; 685 } 686 } 687 return NULL; /* not found */ 688 } 689 } 690 691 692 /* 693 ** get information about the i-th capture. If there are no captures 694 ** and 'i==0', return information about the whole match, which 695 ** is the range 's'..'e'. If the capture is a string, return 696 ** its length and put its address in '*cap'. If it is an integer 697 ** (a position), push it on the stack and return CAP_POSITION. 698 */ 699 static size_t get_onecapture (MatchState *ms, int i, const char *s, 700 const char *e, const char **cap) { 701 if (i >= ms->level) { 702 if (i != 0) 703 luaL_error(ms->L, "invalid capture index %%%d", i + 1); 704 *cap = s; 705 return e - s; 706 } 707 else { 708 ptrdiff_t capl = ms->capture[i].len; 709 *cap = ms->capture[i].init; 710 if (capl == CAP_UNFINISHED) 711 luaL_error(ms->L, "unfinished capture"); 712 else if (capl == CAP_POSITION) 713 lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1); 714 return capl; 715 } 716 } 717 718 719 /* 720 ** Push the i-th capture on the stack. 721 */ 722 static void push_onecapture (MatchState *ms, int i, const char *s, 723 const char *e) { 724 const char *cap; 725 ptrdiff_t l = get_onecapture(ms, i, s, e, &cap); 726 if (l != CAP_POSITION) 727 lua_pushlstring(ms->L, cap, l); 728 /* else position was already pushed */ 729 } 730 731 732 static int push_captures (MatchState *ms, const char *s, const char *e) { 733 int i; 734 int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 735 luaL_checkstack(ms->L, nlevels, "too many captures"); 736 for (i = 0; i < nlevels; i++) 737 push_onecapture(ms, i, s, e); 738 return nlevels; /* number of strings pushed */ 739 } 740 741 742 /* check whether pattern has no special characters */ 743 static int nospecials (const char *p, size_t l) { 744 size_t upto = 0; 745 do { 746 if (strpbrk(p + upto, SPECIALS)) 747 return 0; /* pattern has a special character */ 748 upto += strlen(p + upto) + 1; /* may have more after \0 */ 749 } while (upto <= l); 750 return 1; /* no special chars found */ 751 } 752 753 754 static void prepstate (MatchState *ms, lua_State *L, 755 const char *s, size_t ls, const char *p, size_t lp) { 756 ms->L = L; 757 ms->matchdepth = MAXCCALLS; 758 ms->src_init = s; 759 ms->src_end = s + ls; 760 ms->p_end = p + lp; 761 } 762 763 764 static void reprepstate (MatchState *ms) { 765 ms->level = 0; 766 lua_assert(ms->matchdepth == MAXCCALLS); 767 } 768 769 770 static int str_find_aux (lua_State *L, int find) { 771 size_t ls, lp; 772 const char *s = luaL_checklstring(L, 1, &ls); 773 const char *p = luaL_checklstring(L, 2, &lp); 774 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; 775 if (init > ls) { /* start after string's end? */ 776 luaL_pushfail(L); /* cannot find anything */ 777 return 1; 778 } 779 /* explicit request or no special characters? */ 780 if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 781 /* do a plain search */ 782 const char *s2 = lmemfind(s + init, ls - init, p, lp); 783 if (s2) { 784 lua_pushinteger(L, (s2 - s) + 1); 785 lua_pushinteger(L, (s2 - s) + lp); 786 return 2; 787 } 788 } 789 else { 790 MatchState ms; 791 const char *s1 = s + init; 792 int anchor = (*p == '^'); 793 if (anchor) { 794 p++; lp--; /* skip anchor character */ 795 } 796 prepstate(&ms, L, s, ls, p, lp); 797 do { 798 const char *res; 799 reprepstate(&ms); 800 if ((res=match(&ms, s1, p)) != NULL) { 801 if (find) { 802 lua_pushinteger(L, (s1 - s) + 1); /* start */ 803 lua_pushinteger(L, res - s); /* end */ 804 return push_captures(&ms, NULL, 0) + 2; 805 } 806 else 807 return push_captures(&ms, s1, res); 808 } 809 } while (s1++ < ms.src_end && !anchor); 810 } 811 luaL_pushfail(L); /* not found */ 812 return 1; 813 } 814 815 816 static int str_find (lua_State *L) { 817 return str_find_aux(L, 1); 818 } 819 820 821 static int str_match (lua_State *L) { 822 return str_find_aux(L, 0); 823 } 824 825 826 /* state for 'gmatch' */ 827 typedef struct GMatchState { 828 const char *src; /* current position */ 829 const char *p; /* pattern */ 830 const char *lastmatch; /* end of last match */ 831 MatchState ms; /* match state */ 832 } GMatchState; 833 834 835 static int gmatch_aux (lua_State *L) { 836 GMatchState *gm = (GMatchState *)lua_touserdata(L, lua_upvalueindex(3)); 837 const char *src; 838 gm->ms.L = L; 839 for (src = gm->src; src <= gm->ms.src_end; src++) { 840 const char *e; 841 reprepstate(&gm->ms); 842 if ((e = match(&gm->ms, src, gm->p)) != NULL && e != gm->lastmatch) { 843 gm->src = gm->lastmatch = e; 844 return push_captures(&gm->ms, src, e); 845 } 846 } 847 return 0; /* not found */ 848 } 849 850 851 static int gmatch (lua_State *L) { 852 size_t ls, lp; 853 const char *s = luaL_checklstring(L, 1, &ls); 854 const char *p = luaL_checklstring(L, 2, &lp); 855 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; 856 GMatchState *gm; 857 lua_settop(L, 2); /* keep strings on closure to avoid being collected */ 858 gm = (GMatchState *)lua_newuserdatauv(L, sizeof(GMatchState), 0); 859 if (init > ls) /* start after string's end? */ 860 init = ls + 1; /* avoid overflows in 's + init' */ 861 prepstate(&gm->ms, L, s, ls, p, lp); 862 gm->src = s + init; gm->p = p; gm->lastmatch = NULL; 863 lua_pushcclosure(L, gmatch_aux, 3); 864 return 1; 865 } 866 867 868 static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 869 const char *e) { 870 size_t l; 871 lua_State *L = ms->L; 872 const char *news = lua_tolstring(L, 3, &l); 873 const char *p; 874 while ((p = (char *)memchr(news, L_ESC, l)) != NULL) { 875 luaL_addlstring(b, news, p - news); 876 p++; /* skip ESC */ 877 if (*p == L_ESC) /* '%%' */ 878 luaL_addchar(b, *p); 879 else if (*p == '0') /* '%0' */ 880 luaL_addlstring(b, s, e - s); 881 else if (isdigit(uchar(*p))) { /* '%n' */ 882 const char *cap; 883 ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap); 884 if (resl == CAP_POSITION) 885 luaL_addvalue(b); /* add position to accumulated result */ 886 else 887 luaL_addlstring(b, cap, resl); 888 } 889 else 890 luaL_error(L, "invalid use of '%c' in replacement string", L_ESC); 891 l -= p + 1 - news; 892 news = p + 1; 893 } 894 luaL_addlstring(b, news, l); 895 } 896 897 898 /* 899 ** Add the replacement value to the string buffer 'b'. 900 ** Return true if the original string was changed. (Function calls and 901 ** table indexing resulting in nil or false do not change the subject.) 902 */ 903 static int add_value (MatchState *ms, luaL_Buffer *b, const char *s, 904 const char *e, int tr) { 905 lua_State *L = ms->L; 906 switch (tr) { 907 case LUA_TFUNCTION: { /* call the function */ 908 int n; 909 lua_pushvalue(L, 3); /* push the function */ 910 n = push_captures(ms, s, e); /* all captures as arguments */ 911 lua_call(L, n, 1); /* call it */ 912 break; 913 } 914 case LUA_TTABLE: { /* index the table */ 915 push_onecapture(ms, 0, s, e); /* first capture is the index */ 916 lua_gettable(L, 3); 917 break; 918 } 919 default: { /* LUA_TNUMBER or LUA_TSTRING */ 920 add_s(ms, b, s, e); /* add value to the buffer */ 921 return 1; /* something changed */ 922 } 923 } 924 if (!lua_toboolean(L, -1)) { /* nil or false? */ 925 lua_pop(L, 1); /* remove value */ 926 luaL_addlstring(b, s, e - s); /* keep original text */ 927 return 0; /* no changes */ 928 } 929 else if (!lua_isstring(L, -1)) 930 return luaL_error(L, "invalid replacement value (a %s)", 931 luaL_typename(L, -1)); 932 else { 933 luaL_addvalue(b); /* add result to accumulator */ 934 return 1; /* something changed */ 935 } 936 } 937 938 939 static int str_gsub (lua_State *L) { 940 size_t srcl, lp; 941 const char *src = luaL_checklstring(L, 1, &srcl); /* subject */ 942 const char *p = luaL_checklstring(L, 2, &lp); /* pattern */ 943 const char *lastmatch = NULL; /* end of last match */ 944 int tr = lua_type(L, 3); /* replacement type */ 945 lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */ 946 int anchor = (*p == '^'); 947 lua_Integer n = 0; /* replacement count */ 948 int changed = 0; /* change flag */ 949 MatchState ms; 950 luaL_Buffer b; 951 luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 952 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 953 "string/function/table"); 954 luaL_buffinit(L, &b); 955 if (anchor) { 956 p++; lp--; /* skip anchor character */ 957 } 958 prepstate(&ms, L, src, srcl, p, lp); 959 while (n < max_s) { 960 const char *e; 961 reprepstate(&ms); /* (re)prepare state for new match */ 962 if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */ 963 n++; 964 changed = add_value(&ms, &b, src, e, tr) | changed; 965 src = lastmatch = e; 966 } 967 else if (src < ms.src_end) /* otherwise, skip one character */ 968 luaL_addchar(&b, *src++); 969 else break; /* end of subject */ 970 if (anchor) break; 971 } 972 if (!changed) /* no changes? */ 973 lua_pushvalue(L, 1); /* return original string */ 974 else { /* something changed */ 975 luaL_addlstring(&b, src, ms.src_end-src); 976 luaL_pushresult(&b); /* create and return new string */ 977 } 978 lua_pushinteger(L, n); /* number of substitutions */ 979 return 2; 980 } 981 982 /* }====================================================== */ 983 984 985 986 /* 987 ** {====================================================== 988 ** STRING FORMAT 989 ** ======================================================= 990 */ 991 992 #if !defined(lua_number2strx) /* { */ 993 994 /* 995 ** Hexadecimal floating-point formatter 996 */ 997 998 #define SIZELENMOD (sizeof(LUA_NUMBER_FRMLEN)/sizeof(char)) 999 1000 1001 /* 1002 ** Number of bits that goes into the first digit. It can be any value 1003 ** between 1 and 4; the following definition tries to align the number 1004 ** to nibble boundaries by making what is left after that first digit a 1005 ** multiple of 4. 1006 */ 1007 #define L_NBFD ((l_floatatt(MANT_DIG) - 1)%4 + 1) 1008 1009 1010 /* 1011 ** Add integer part of 'x' to buffer and return new 'x' 1012 */ 1013 static lua_Number adddigit (char *buff, int n, lua_Number x) { 1014 lua_Number dd = l_mathop(floor)(x); /* get integer part from 'x' */ 1015 int d = (int)dd; 1016 buff[n] = (d < 10 ? d + '0' : d - 10 + 'a'); /* add to buffer */ 1017 return x - dd; /* return what is left */ 1018 } 1019 1020 1021 static int num2straux (char *buff, int sz, lua_Number x) { 1022 /* if 'inf' or 'NaN', format it like '%g' */ 1023 if (x != x || x == (lua_Number)HUGE_VAL || x == -(lua_Number)HUGE_VAL) 1024 return l_sprintf(buff, sz, LUA_NUMBER_FMT, (LUAI_UACNUMBER)x); 1025 else if (x == 0) { /* can be -0... */ 1026 /* create "0" or "-0" followed by exponent */ 1027 return l_sprintf(buff, sz, LUA_NUMBER_FMT "x0p+0", (LUAI_UACNUMBER)x); 1028 } 1029 else { 1030 int e; 1031 lua_Number m = l_mathop(frexp)(x, &e); /* 'x' fraction and exponent */ 1032 int n = 0; /* character count */ 1033 if (m < 0) { /* is number negative? */ 1034 buff[n++] = '-'; /* add sign */ 1035 m = -m; /* make it positive */ 1036 } 1037 buff[n++] = '0'; buff[n++] = 'x'; /* add "0x" */ 1038 m = adddigit(buff, n++, m * (1 << L_NBFD)); /* add first digit */ 1039 e -= L_NBFD; /* this digit goes before the radix point */ 1040 if (m > 0) { /* more digits? */ 1041 buff[n++] = lua_getlocaledecpoint(); /* add radix point */ 1042 do { /* add as many digits as needed */ 1043 m = adddigit(buff, n++, m * 16); 1044 } while (m > 0); 1045 } 1046 n += l_sprintf(buff + n, sz - n, "p%+d", e); /* add exponent */ 1047 lua_assert(n < sz); 1048 return n; 1049 } 1050 } 1051 1052 1053 static int lua_number2strx (lua_State *L, char *buff, int sz, 1054 const char *fmt, lua_Number x) { 1055 int n = num2straux(buff, sz, x); 1056 if (fmt[SIZELENMOD] == 'A') { 1057 int i; 1058 for (i = 0; i < n; i++) 1059 buff[i] = toupper(uchar(buff[i])); 1060 } 1061 else if (fmt[SIZELENMOD] != 'a') 1062 return luaL_error(L, "modifiers for format '%%a'/'%%A' not implemented"); 1063 return n; 1064 } 1065 1066 #endif /* } */ 1067 1068 1069 /* 1070 ** Maximum size for items formatted with '%f'. This size is produced 1071 ** by format('%.99f', -maxfloat), and is equal to 99 + 3 ('-', '.', 1072 ** and '\0') + number of decimal digits to represent maxfloat (which 1073 ** is maximum exponent + 1). (99+3+1, adding some extra, 110) 1074 */ 1075 #define MAX_ITEMF (110 + l_floatatt(MAX_10_EXP)) 1076 1077 1078 /* 1079 ** All formats except '%f' do not need that large limit. The other 1080 ** float formats use exponents, so that they fit in the 99 limit for 1081 ** significant digits; 's' for large strings and 'q' add items directly 1082 ** to the buffer; all integer formats also fit in the 99 limit. The 1083 ** worst case are floats: they may need 99 significant digits, plus 1084 ** '0x', '-', '.', 'e+XXXX', and '\0'. Adding some extra, 120. 1085 */ 1086 #define MAX_ITEM 120 1087 1088 1089 /* valid flags in a format specification */ 1090 #if !defined(L_FMTFLAGS) 1091 #define L_FMTFLAGS "-+ #0" 1092 #endif 1093 1094 1095 /* 1096 ** maximum size of each format specification (such as "%-099.99d") 1097 */ 1098 #define MAX_FORMAT 32 1099 1100 1101 static void addquoted (luaL_Buffer *b, const char *s, size_t len) { 1102 luaL_addchar(b, '"'); 1103 while (len--) { 1104 if (*s == '"' || *s == '\\' || *s == '\n') { 1105 luaL_addchar(b, '\\'); 1106 luaL_addchar(b, *s); 1107 } 1108 else if (iscntrl(uchar(*s))) { 1109 char buff[10]; 1110 if (!isdigit(uchar(*(s+1)))) 1111 l_sprintf(buff, sizeof(buff), "\\%d", (int)uchar(*s)); 1112 else 1113 l_sprintf(buff, sizeof(buff), "\\%03d", (int)uchar(*s)); 1114 luaL_addstring(b, buff); 1115 } 1116 else 1117 luaL_addchar(b, *s); 1118 s++; 1119 } 1120 luaL_addchar(b, '"'); 1121 } 1122 1123 1124 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1125 /* 1126 ** Serialize a floating-point number in such a way that it can be 1127 ** scanned back by Lua. Use hexadecimal format for "common" numbers 1128 ** (to preserve precision); inf, -inf, and NaN are handled separately. 1129 ** (NaN cannot be expressed as a numeral, so we write '(0/0)' for it.) 1130 */ 1131 static int quotefloat (lua_State *L, char *buff, lua_Number n) { 1132 const char *s; /* for the fixed representations */ 1133 if (n == (lua_Number)HUGE_VAL) /* inf? */ 1134 s = "1e9999"; 1135 else if (n == -(lua_Number)HUGE_VAL) /* -inf? */ 1136 s = "-1e9999"; 1137 else if (n != n) /* NaN? */ 1138 s = "(0/0)"; 1139 else { /* format number as hexadecimal */ 1140 int nb = lua_number2strx(L, buff, MAX_ITEM, 1141 "%" LUA_NUMBER_FRMLEN "a", n); 1142 /* ensures that 'buff' string uses a dot as the radix character */ 1143 if (memchr(buff, '.', nb) == NULL) { /* no dot? */ 1144 char point = lua_getlocaledecpoint(); /* try locale point */ 1145 char *ppoint = (char *)memchr(buff, point, nb); 1146 if (ppoint) *ppoint = '.'; /* change it to a dot */ 1147 } 1148 return nb; 1149 } 1150 /* for the fixed representations */ 1151 return l_sprintf(buff, MAX_ITEM, "%s", s); 1152 } 1153 #endif 1154 1155 static void addliteral (lua_State *L, luaL_Buffer *b, int arg) { 1156 switch (lua_type(L, arg)) { 1157 case LUA_TSTRING: { 1158 size_t len; 1159 const char *s = lua_tolstring(L, arg, &len); 1160 addquoted(b, s, len); 1161 break; 1162 } 1163 case LUA_TNUMBER: { 1164 char *buff = luaL_prepbuffsize(b, MAX_ITEM); 1165 int nb; 1166 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1167 if (!lua_isinteger(L, arg)) /* float? */ 1168 nb = quotefloat(L, buff, lua_tonumber(L, arg)); 1169 else { /* integers */ 1170 #else 1171 { 1172 #endif 1173 lua_Integer n = lua_tointeger(L, arg); 1174 const char *format = (n == LUA_MININTEGER) /* corner case? */ 1175 ? "0x%" LUA_INTEGER_FRMLEN "x" /* use hex */ 1176 : LUA_INTEGER_FMT; /* else use default format */ 1177 nb = l_sprintf(buff, MAX_ITEM, format, (LUAI_UACINT)n); 1178 } 1179 luaL_addsize(b, nb); 1180 break; 1181 } 1182 case LUA_TNIL: case LUA_TBOOLEAN: { 1183 luaL_tolstring(L, arg, NULL); 1184 luaL_addvalue(b); 1185 break; 1186 } 1187 default: { 1188 luaL_argerror(L, arg, "value has no literal form"); 1189 } 1190 } 1191 } 1192 1193 1194 static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { 1195 const char *p = strfrmt; 1196 while (*p != '\0' && strchr(L_FMTFLAGS, *p) != NULL) p++; /* skip flags */ 1197 if ((size_t)(p - strfrmt) >= sizeof(L_FMTFLAGS)/sizeof(char)) 1198 luaL_error(L, "invalid format (repeated flags)"); 1199 if (isdigit(uchar(*p))) p++; /* skip width */ 1200 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 1201 if (*p == '.') { 1202 p++; 1203 if (isdigit(uchar(*p))) p++; /* skip precision */ 1204 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 1205 } 1206 if (isdigit(uchar(*p))) 1207 luaL_error(L, "invalid format (width or precision too long)"); 1208 *(form++) = '%'; 1209 memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char)); 1210 form += (p - strfrmt) + 1; 1211 *form = '\0'; 1212 return p; 1213 } 1214 1215 1216 /* 1217 ** add length modifier into formats 1218 */ 1219 static void addlenmod (char *form, const char *lenmod) { 1220 size_t l = strlen(form); 1221 size_t lm = strlen(lenmod); 1222 char spec = form[l - 1]; 1223 strcpy(form + l - 1, lenmod); 1224 form[l + lm - 1] = spec; 1225 form[l + lm] = '\0'; 1226 } 1227 1228 1229 static int str_format (lua_State *L) { 1230 int top = lua_gettop(L); 1231 int arg = 1; 1232 size_t sfl; 1233 const char *strfrmt = luaL_checklstring(L, arg, &sfl); 1234 const char *strfrmt_end = strfrmt+sfl; 1235 luaL_Buffer b; 1236 luaL_buffinit(L, &b); 1237 while (strfrmt < strfrmt_end) { 1238 if (*strfrmt != L_ESC) 1239 luaL_addchar(&b, *strfrmt++); 1240 else if (*++strfrmt == L_ESC) 1241 luaL_addchar(&b, *strfrmt++); /* %% */ 1242 else { /* format item */ 1243 char form[MAX_FORMAT]; /* to store the format ('%...') */ 1244 int maxitem = MAX_ITEM; 1245 char *buff = luaL_prepbuffsize(&b, maxitem); /* to put formatted item */ 1246 int nb = 0; /* number of bytes in added item */ 1247 if (++arg > top) 1248 return luaL_argerror(L, arg, "no value"); 1249 strfrmt = scanformat(L, strfrmt, form); 1250 switch (*strfrmt++) { 1251 case 'c': { 1252 nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg)); 1253 break; 1254 } 1255 case 'd': case 'i': 1256 case 'o': case 'u': case 'x': case 'X': { 1257 lua_Integer n = luaL_checkinteger(L, arg); 1258 addlenmod(form, LUA_INTEGER_FRMLEN); 1259 nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n); 1260 break; 1261 } 1262 case 'a': case 'A': 1263 addlenmod(form, LUA_NUMBER_FRMLEN); 1264 nb = lua_number2strx(L, buff, maxitem, form, 1265 luaL_checknumber(L, arg)); 1266 break; 1267 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1268 case 'f': 1269 maxitem = MAX_ITEMF; /* extra space for '%f' */ 1270 buff = luaL_prepbuffsize(&b, maxitem); 1271 /* FALLTHROUGH */ 1272 #endif 1273 case 'e': case 'E': case 'g': case 'G': { 1274 lua_Number n = luaL_checknumber(L, arg); 1275 addlenmod(form, LUA_NUMBER_FRMLEN); 1276 nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n); 1277 break; 1278 } 1279 case 'p': { 1280 const void *p = lua_topointer(L, arg); 1281 if (p == NULL) { /* avoid calling 'printf' with argument NULL */ 1282 p = "(null)"; /* result */ 1283 form[strlen(form) - 1] = 's'; /* format it as a string */ 1284 } 1285 nb = l_sprintf(buff, maxitem, form, p); 1286 break; 1287 } 1288 case 'q': { 1289 if (form[2] != '\0') /* modifiers? */ 1290 return luaL_error(L, "specifier '%%q' cannot have modifiers"); 1291 addliteral(L, &b, arg); 1292 break; 1293 } 1294 case 's': { 1295 size_t l; 1296 const char *s = luaL_tolstring(L, arg, &l); 1297 if (form[2] == '\0') /* no modifiers? */ 1298 luaL_addvalue(&b); /* keep entire string */ 1299 else { 1300 luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); 1301 if (!strchr(form, '.') && l >= 100) { 1302 /* no precision and string is too long to be formatted */ 1303 luaL_addvalue(&b); /* keep entire string */ 1304 } 1305 else { /* format the string into 'buff' */ 1306 nb = l_sprintf(buff, maxitem, form, s); 1307 lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 1308 } 1309 } 1310 break; 1311 } 1312 default: { /* also treat cases 'pnLlh' */ 1313 return luaL_error(L, "invalid conversion '%s' to 'format'", form); 1314 } 1315 } 1316 lua_assert(nb < maxitem); 1317 luaL_addsize(&b, nb); 1318 } 1319 } 1320 luaL_pushresult(&b); 1321 return 1; 1322 } 1323 1324 /* }====================================================== */ 1325 1326 1327 /* 1328 ** {====================================================== 1329 ** PACK/UNPACK 1330 ** ======================================================= 1331 */ 1332 1333 1334 /* value used for padding */ 1335 #if !defined(LUAL_PACKPADBYTE) 1336 #define LUAL_PACKPADBYTE 0x00 1337 #endif 1338 1339 /* maximum size for the binary representation of an integer */ 1340 #define MAXINTSIZE 16 1341 1342 /* number of bits in a character */ 1343 #define NB CHAR_BIT 1344 1345 /* mask for one character (NB 1's) */ 1346 #define MC ((1 << NB) - 1) 1347 1348 /* size of a lua_Integer */ 1349 #define SZINT ((int)sizeof(lua_Integer)) 1350 1351 1352 /* dummy union to get native endianness */ 1353 static const union { 1354 int dummy; 1355 char little; /* true iff machine is little endian */ 1356 } nativeendian = {1}; 1357 1358 1359 /* dummy structure to get native alignment requirements */ 1360 struct cD { 1361 char c; 1362 union { 1363 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1364 double d; 1365 #endif 1366 void *p; lua_Integer i; lua_Number n; } u; 1367 }; 1368 1369 #define MAXALIGN (offsetof(struct cD, u)) 1370 1371 1372 /* 1373 ** Union for serializing floats 1374 */ 1375 typedef union Ftypes { 1376 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1377 float f; 1378 double d; 1379 #endif 1380 lua_Number n; 1381 } Ftypes; 1382 1383 1384 /* 1385 ** information to pack/unpack stuff 1386 */ 1387 typedef struct Header { 1388 lua_State *L; 1389 int islittle; 1390 int maxalign; 1391 } Header; 1392 1393 1394 /* 1395 ** options for pack/unpack 1396 */ 1397 typedef enum KOption { 1398 Kint, /* signed integers */ 1399 Kuint, /* unsigned integers */ 1400 Kfloat, /* floating-point numbers */ 1401 Kchar, /* fixed-length strings */ 1402 Kstring, /* strings with prefixed length */ 1403 Kzstr, /* zero-terminated strings */ 1404 Kpadding, /* padding */ 1405 Kpaddalign, /* padding for alignment */ 1406 Knop /* no-op (configuration or spaces) */ 1407 } KOption; 1408 1409 1410 /* 1411 ** Read an integer numeral from string 'fmt' or return 'df' if 1412 ** there is no numeral 1413 */ 1414 static int digit (int c) { return '0' <= c && c <= '9'; } 1415 1416 static int getnum (const char **fmt, int df) { 1417 if (!digit(**fmt)) /* no number? */ 1418 return df; /* return default value */ 1419 else { 1420 int a = 0; 1421 do { 1422 a = a*10 + (*((*fmt)++) - '0'); 1423 } while (digit(**fmt) && a <= ((int)MAXSIZE - 9)/10); 1424 return a; 1425 } 1426 } 1427 1428 1429 /* 1430 ** Read an integer numeral and raises an error if it is larger 1431 ** than the maximum size for integers. 1432 */ 1433 static int getnumlimit (Header *h, const char **fmt, int df) { 1434 int sz = getnum(fmt, df); 1435 if (sz > MAXINTSIZE || sz <= 0) 1436 return luaL_error(h->L, "integral size (%d) out of limits [1,%d]", 1437 sz, MAXINTSIZE); 1438 return sz; 1439 } 1440 1441 1442 /* 1443 ** Initialize Header 1444 */ 1445 static void initheader (lua_State *L, Header *h) { 1446 h->L = L; 1447 h->islittle = nativeendian.little; 1448 h->maxalign = 1; 1449 } 1450 1451 1452 /* 1453 ** Read and classify next option. 'size' is filled with option's size. 1454 */ 1455 static KOption getoption (Header *h, const char **fmt, int *size) { 1456 int opt = *((*fmt)++); 1457 *size = 0; /* default */ 1458 switch (opt) { 1459 case 'b': *size = sizeof(char); return Kint; 1460 case 'B': *size = sizeof(char); return Kuint; 1461 case 'h': *size = sizeof(short); return Kint; 1462 case 'H': *size = sizeof(short); return Kuint; 1463 case 'l': *size = sizeof(long); return Kint; 1464 case 'L': *size = sizeof(long); return Kuint; 1465 case 'j': *size = sizeof(lua_Integer); return Kint; 1466 case 'J': *size = sizeof(lua_Integer); return Kuint; 1467 case 'T': *size = sizeof(size_t); return Kuint; 1468 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1469 case 'f': *size = sizeof(float); return Kfloat; 1470 case 'd': *size = sizeof(double); return Kfloat; 1471 #endif 1472 case 'n': *size = sizeof(lua_Number); return Kfloat; 1473 case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint; 1474 case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint; 1475 case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring; 1476 case 'c': 1477 *size = getnum(fmt, -1); 1478 if (*size == -1) 1479 luaL_error(h->L, "missing size for format option 'c'"); 1480 return Kchar; 1481 case 'z': return Kzstr; 1482 case 'x': *size = 1; return Kpadding; 1483 case 'X': return Kpaddalign; 1484 case ' ': break; 1485 case '<': h->islittle = 1; break; 1486 case '>': h->islittle = 0; break; 1487 case '=': h->islittle = nativeendian.little; break; 1488 case '!': h->maxalign = getnumlimit(h, fmt, MAXALIGN); break; 1489 default: luaL_error(h->L, "invalid format option '%c'", opt); 1490 } 1491 return Knop; 1492 } 1493 1494 1495 /* 1496 ** Read, classify, and fill other details about the next option. 1497 ** 'psize' is filled with option's size, 'notoalign' with its 1498 ** alignment requirements. 1499 ** Local variable 'size' gets the size to be aligned. (Kpadal option 1500 ** always gets its full alignment, other options are limited by 1501 ** the maximum alignment ('maxalign'). Kchar option needs no alignment 1502 ** despite its size. 1503 */ 1504 static KOption getdetails (Header *h, size_t totalsize, 1505 const char **fmt, int *psize, int *ntoalign) { 1506 KOption opt = getoption(h, fmt, psize); 1507 int align = *psize; /* usually, alignment follows size */ 1508 if (opt == Kpaddalign) { /* 'X' gets alignment from following option */ 1509 if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0) 1510 luaL_argerror(h->L, 1, "invalid next option for option 'X'"); 1511 } 1512 if (align <= 1 || opt == Kchar) /* need no alignment? */ 1513 *ntoalign = 0; 1514 else { 1515 if (align > h->maxalign) /* enforce maximum alignment */ 1516 align = h->maxalign; 1517 if ((align & (align - 1)) != 0) /* is 'align' not a power of 2? */ 1518 luaL_argerror(h->L, 1, "format asks for alignment not power of 2"); 1519 *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1); 1520 } 1521 return opt; 1522 } 1523 1524 1525 /* 1526 ** Pack integer 'n' with 'size' bytes and 'islittle' endianness. 1527 ** The final 'if' handles the case when 'size' is larger than 1528 ** the size of a Lua integer, correcting the extra sign-extension 1529 ** bytes if necessary (by default they would be zeros). 1530 */ 1531 static void packint (luaL_Buffer *b, lua_Unsigned n, 1532 int islittle, int size, int neg) { 1533 char *buff = luaL_prepbuffsize(b, size); 1534 int i; 1535 buff[islittle ? 0 : size - 1] = (char)(n & MC); /* first byte */ 1536 for (i = 1; i < size; i++) { 1537 n >>= NB; 1538 buff[islittle ? i : size - 1 - i] = (char)(n & MC); 1539 } 1540 if (neg && size > SZINT) { /* negative number need sign extension? */ 1541 for (i = SZINT; i < size; i++) /* correct extra bytes */ 1542 buff[islittle ? i : size - 1 - i] = (char)MC; 1543 } 1544 luaL_addsize(b, size); /* add result to buffer */ 1545 } 1546 1547 1548 /* 1549 ** Copy 'size' bytes from 'src' to 'dest', correcting endianness if 1550 ** given 'islittle' is different from native endianness. 1551 */ 1552 static void copywithendian (char *dest, const char *src, 1553 int size, int islittle) { 1554 if (islittle == nativeendian.little) 1555 memcpy(dest, src, size); 1556 else { 1557 dest += size - 1; 1558 while (size-- != 0) 1559 *(dest--) = *(src++); 1560 } 1561 } 1562 1563 1564 static int str_pack (lua_State *L) { 1565 luaL_Buffer b; 1566 Header h; 1567 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1568 int arg = 1; /* current argument to pack */ 1569 size_t totalsize = 0; /* accumulate total size of result */ 1570 initheader(L, &h); 1571 lua_pushnil(L); /* mark to separate arguments from string buffer */ 1572 luaL_buffinit(L, &b); 1573 while (*fmt != '\0') { 1574 int size, ntoalign; 1575 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1576 totalsize += ntoalign + size; 1577 while (ntoalign-- > 0) 1578 luaL_addchar(&b, LUAL_PACKPADBYTE); /* fill alignment */ 1579 arg++; 1580 switch (opt) { 1581 case Kint: { /* signed integers */ 1582 lua_Integer n = luaL_checkinteger(L, arg); 1583 if (size < SZINT) { /* need overflow check? */ 1584 lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1); 1585 luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow"); 1586 } 1587 packint(&b, (lua_Unsigned)n, h.islittle, size, (n < 0)); 1588 break; 1589 } 1590 case Kuint: { /* unsigned integers */ 1591 lua_Integer n = luaL_checkinteger(L, arg); 1592 if (size < SZINT) /* need overflow check? */ 1593 luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)), 1594 arg, "unsigned overflow"); 1595 packint(&b, (lua_Unsigned)n, h.islittle, size, 0); 1596 break; 1597 } 1598 case Kfloat: { /* floating-point options */ 1599 Ftypes u; 1600 char *buff = luaL_prepbuffsize(&b, size); 1601 lua_Number n = luaL_checknumber(L, arg); /* get argument */ 1602 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1603 if (size == sizeof(u.f)) u.f = (float)n; /* copy it into 'u' */ 1604 else if (size == sizeof(u.d)) u.d = (double)n; 1605 else u.n = n; 1606 #else 1607 u.n = n; 1608 #endif 1609 /* move 'u' to final result, correcting endianness if needed */ 1610 copywithendian(buff, (char *)&u, size, h.islittle); 1611 luaL_addsize(&b, size); 1612 break; 1613 } 1614 case Kchar: { /* fixed-size string */ 1615 size_t len; 1616 const char *s = luaL_checklstring(L, arg, &len); 1617 luaL_argcheck(L, len <= (size_t)size, arg, 1618 "string longer than given size"); 1619 luaL_addlstring(&b, s, len); /* add string */ 1620 while (len++ < (size_t)size) /* pad extra space */ 1621 luaL_addchar(&b, LUAL_PACKPADBYTE); 1622 break; 1623 } 1624 case Kstring: { /* strings with length count */ 1625 size_t len; 1626 const char *s = luaL_checklstring(L, arg, &len); 1627 luaL_argcheck(L, size >= (int)sizeof(size_t) || 1628 len < ((size_t)1 << (size * NB)), 1629 arg, "string length does not fit in given size"); 1630 packint(&b, (lua_Unsigned)len, h.islittle, size, 0); /* pack length */ 1631 luaL_addlstring(&b, s, len); 1632 totalsize += len; 1633 break; 1634 } 1635 case Kzstr: { /* zero-terminated string */ 1636 size_t len; 1637 const char *s = luaL_checklstring(L, arg, &len); 1638 luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros"); 1639 luaL_addlstring(&b, s, len); 1640 luaL_addchar(&b, '\0'); /* add zero at the end */ 1641 totalsize += len + 1; 1642 break; 1643 } 1644 case Kpadding: luaL_addchar(&b, LUAL_PACKPADBYTE); /* FALLTHROUGH */ 1645 case Kpaddalign: case Knop: 1646 arg--; /* undo increment */ 1647 break; 1648 } 1649 } 1650 luaL_pushresult(&b); 1651 return 1; 1652 } 1653 1654 1655 static int str_packsize (lua_State *L) { 1656 Header h; 1657 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1658 size_t totalsize = 0; /* accumulate total size of result */ 1659 initheader(L, &h); 1660 while (*fmt != '\0') { 1661 int size, ntoalign; 1662 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1663 luaL_argcheck(L, opt != Kstring && opt != Kzstr, 1, 1664 "variable-length format"); 1665 size += ntoalign; /* total space used by option */ 1666 luaL_argcheck(L, totalsize <= MAXSIZE - size, 1, 1667 "format result too large"); 1668 totalsize += size; 1669 } 1670 lua_pushinteger(L, (lua_Integer)totalsize); 1671 return 1; 1672 } 1673 1674 1675 /* 1676 ** Unpack an integer with 'size' bytes and 'islittle' endianness. 1677 ** If size is smaller than the size of a Lua integer and integer 1678 ** is signed, must do sign extension (propagating the sign to the 1679 ** higher bits); if size is larger than the size of a Lua integer, 1680 ** it must check the unread bytes to see whether they do not cause an 1681 ** overflow. 1682 */ 1683 static lua_Integer unpackint (lua_State *L, const char *str, 1684 int islittle, int size, int issigned) { 1685 lua_Unsigned res = 0; 1686 int i; 1687 int limit = (size <= SZINT) ? size : SZINT; 1688 for (i = limit - 1; i >= 0; i--) { 1689 res <<= NB; 1690 res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i]; 1691 } 1692 if (size < SZINT) { /* real size smaller than lua_Integer? */ 1693 if (issigned) { /* needs sign extension? */ 1694 lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1); 1695 res = ((res ^ mask) - mask); /* do sign extension */ 1696 } 1697 } 1698 else if (size > SZINT) { /* must check unread bytes */ 1699 int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC; 1700 for (i = limit; i < size; i++) { 1701 if ((unsigned char)str[islittle ? i : size - 1 - i] != mask) 1702 luaL_error(L, "%d-byte integer does not fit into Lua Integer", size); 1703 } 1704 } 1705 return (lua_Integer)res; 1706 } 1707 1708 1709 static int str_unpack (lua_State *L) { 1710 Header h; 1711 const char *fmt = luaL_checkstring(L, 1); 1712 size_t ld; 1713 const char *data = luaL_checklstring(L, 2, &ld); 1714 size_t pos = posrelatI(luaL_optinteger(L, 3, 1), ld) - 1; 1715 int n = 0; /* number of results */ 1716 luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); 1717 initheader(L, &h); 1718 while (*fmt != '\0') { 1719 int size, ntoalign; 1720 KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign); 1721 luaL_argcheck(L, (size_t)ntoalign + size <= ld - pos, 2, 1722 "data string too short"); 1723 pos += ntoalign; /* skip alignment */ 1724 /* stack space for item + next position */ 1725 luaL_checkstack(L, 2, "too many results"); 1726 n++; 1727 switch (opt) { 1728 case Kint: 1729 case Kuint: { 1730 lua_Integer res = unpackint(L, data + pos, h.islittle, size, 1731 (opt == Kint)); 1732 lua_pushinteger(L, res); 1733 break; 1734 } 1735 case Kfloat: { 1736 Ftypes u; 1737 lua_Number num; 1738 copywithendian((char *)&u, data + pos, size, h.islittle); 1739 #if LUA_FLOAT_TYPE != LUA_FLOAT_INT64 1740 if (size == sizeof(u.f)) num = (lua_Number)u.f; 1741 else if (size == sizeof(u.d)) num = (lua_Number)u.d; 1742 else num = u.n; 1743 #else 1744 num = u.n; 1745 #endif 1746 lua_pushnumber(L, num); 1747 break; 1748 } 1749 case Kchar: { 1750 lua_pushlstring(L, data + pos, size); 1751 break; 1752 } 1753 case Kstring: { 1754 size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0); 1755 luaL_argcheck(L, len <= ld - pos - size, 2, "data string too short"); 1756 lua_pushlstring(L, data + pos + size, len); 1757 pos += len; /* skip string */ 1758 break; 1759 } 1760 case Kzstr: { 1761 size_t len = strlen(data + pos); 1762 luaL_argcheck(L, pos + len < ld, 2, 1763 "unfinished string for format 'z'"); 1764 lua_pushlstring(L, data + pos, len); 1765 pos += len + 1; /* skip string plus final '\0' */ 1766 break; 1767 } 1768 case Kpaddalign: case Kpadding: case Knop: 1769 n--; /* undo increment */ 1770 break; 1771 } 1772 pos += size; 1773 } 1774 lua_pushinteger(L, pos + 1); /* next position */ 1775 return n + 1; 1776 } 1777 1778 /* }====================================================== */ 1779 1780 1781 static const luaL_Reg strlib[] = { 1782 {"byte", str_byte}, 1783 {"char", str_char}, 1784 {"dump", str_dump}, 1785 {"find", str_find}, 1786 {"format", str_format}, 1787 {"gmatch", gmatch}, 1788 {"gsub", str_gsub}, 1789 {"len", str_len}, 1790 {"lower", str_lower}, 1791 {"match", str_match}, 1792 {"rep", str_rep}, 1793 {"reverse", str_reverse}, 1794 {"sub", str_sub}, 1795 {"upper", str_upper}, 1796 {"pack", str_pack}, 1797 {"packsize", str_packsize}, 1798 {"unpack", str_unpack}, 1799 {NULL, NULL} 1800 }; 1801 1802 1803 static void createmetatable (lua_State *L) { 1804 /* table to be metatable for strings */ 1805 luaL_newlibtable(L, stringmetamethods); 1806 luaL_setfuncs(L, stringmetamethods, 0); 1807 lua_pushliteral(L, ""); /* dummy string */ 1808 lua_pushvalue(L, -2); /* copy table */ 1809 lua_setmetatable(L, -2); /* set table as metatable for strings */ 1810 lua_pop(L, 1); /* pop dummy string */ 1811 lua_pushvalue(L, -2); /* get string library */ 1812 lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1813 lua_pop(L, 1); /* pop metatable */ 1814 } 1815 1816 1817 /* 1818 ** Open string library 1819 */ 1820 LUAMOD_API int luaopen_string (lua_State *L) { 1821 luaL_newlib(L, strlib); 1822 createmetatable(L); 1823 return 1; 1824 } 1825 1826