1 /* 2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 using System; 26 using System.Collections.Generic; 27 using System.IO; 28 using System.Reflection; 29 using System.Text; 30 31 /* 32 * This is the main compiler class. 33 */ 34 35 public class T0Comp { 36 37 /* 38 * Command-line entry point. 39 */ 40 public static void Main(string[] args) 41 { 42 try { 43 List<string> r = new List<string>(); 44 string outBase = null; 45 List<string> entryPoints = new List<string>(); 46 string coreRun = null; 47 bool flow = true; 48 int dsLim = 32; 49 int rsLim = 32; 50 for (int i = 0; i < args.Length; i ++) { 51 string a = args[i]; 52 if (!a.StartsWith("-")) { 53 r.Add(a); 54 continue; 55 } 56 if (a == "--") { 57 for (;;) { 58 if (++ i >= args.Length) { 59 break; 60 } 61 r.Add(args[i]); 62 } 63 break; 64 } 65 while (a.StartsWith("-")) { 66 a = a.Substring(1); 67 } 68 int j = a.IndexOf('='); 69 string pname; 70 string pval, pval2; 71 if (j < 0) { 72 pname = a.ToLowerInvariant(); 73 pval = null; 74 pval2 = (i + 1) < args.Length 75 ? args[i + 1] : null; 76 } else { 77 pname = a.Substring(0, j).Trim() 78 .ToLowerInvariant(); 79 pval = a.Substring(j + 1); 80 pval2 = null; 81 } 82 switch (pname) { 83 case "o": 84 case "out": 85 if (pval == null) { 86 if (pval2 == null) { 87 Usage(); 88 } 89 i ++; 90 pval = pval2; 91 } 92 if (outBase != null) { 93 Usage(); 94 } 95 outBase = pval; 96 break; 97 case "r": 98 case "run": 99 if (pval == null) { 100 if (pval2 == null) { 101 Usage(); 102 } 103 i ++; 104 pval = pval2; 105 } 106 coreRun = pval; 107 break; 108 case "m": 109 case "main": 110 if (pval == null) { 111 if (pval2 == null) { 112 Usage(); 113 } 114 i ++; 115 pval = pval2; 116 } 117 foreach (string ep in pval.Split(',')) { 118 string epz = ep.Trim(); 119 if (epz.Length > 0) { 120 entryPoints.Add(epz); 121 } 122 } 123 break; 124 case "nf": 125 case "noflow": 126 flow = false; 127 break; 128 default: 129 Usage(); 130 break; 131 } 132 } 133 if (r.Count == 0) { 134 Usage(); 135 } 136 if (outBase == null) { 137 outBase = "t0out"; 138 } 139 if (entryPoints.Count == 0) { 140 entryPoints.Add("main"); 141 } 142 if (coreRun == null) { 143 coreRun = outBase; 144 } 145 T0Comp tc = new T0Comp(); 146 tc.enableFlowAnalysis = flow; 147 tc.dsLimit = dsLim; 148 tc.rsLimit = rsLim; 149 using (TextReader tr = new StreamReader( 150 Assembly.GetExecutingAssembly() 151 .GetManifestResourceStream("t0-kernel"))) 152 { 153 tc.ProcessInput(tr); 154 } 155 foreach (string a in r) { 156 Console.WriteLine("[{0}]", a); 157 using (TextReader tr = File.OpenText(a)) { 158 tc.ProcessInput(tr); 159 } 160 } 161 tc.Generate(outBase, coreRun, entryPoints.ToArray()); 162 } catch (Exception e) { 163 Console.WriteLine(e.ToString()); 164 Environment.Exit(1); 165 } 166 } 167 168 static void Usage() 169 { 170 Console.WriteLine( 171 "usage: T0Comp.exe [ options... ] file..."); 172 Console.WriteLine( 173 "options:"); 174 Console.WriteLine( 175 " -o file use 'file' as base for output file name (default: 't0out')"); 176 Console.WriteLine( 177 " -r name use 'name' as base for run function (default: same as output)"); 178 Console.WriteLine( 179 " -m name[,name...]"); 180 Console.WriteLine( 181 " define entry point(s)"); 182 Console.WriteLine( 183 " -nf disable flow analysis"); 184 Environment.Exit(1); 185 } 186 187 /* 188 * If 'delayedChar' is Int32.MinValue then there is no delayed 189 * character. 190 * If 'delayedChar' equals x >= 0 then there is one delayed 191 * character of value x. 192 * If 'delayedChar' equals y < 0 then there are two delayed 193 * characters, a newline (U+000A) followed by character of 194 * value -(y+1). 195 */ 196 TextReader currentInput; 197 int delayedChar; 198 199 /* 200 * Common StringBuilder used to parse tokens; it is reused for 201 * each new token. 202 */ 203 StringBuilder tokenBuilder; 204 205 /* 206 * There may be a delayed token in some cases. 207 */ 208 String delayedToken; 209 210 /* 211 * Defined words are referenced by name in this map. Names are 212 * string-sensitive; for better reproducibility, the map is sorted 213 * (ordinal order). 214 */ 215 IDictionary<string, Word> words; 216 217 /* 218 * Last defined word is also referenced in 'lastWord'. This is 219 * used by 'immediate'. 220 */ 221 Word lastWord; 222 223 /* 224 * When compiling, this builder is used. A stack saves other 225 * builders in case of nested definition. 226 */ 227 WordBuilder wordBuilder; 228 Stack<WordBuilder> savedWordBuilders; 229 230 /* 231 * C code defined for words is kept in this map, by word name. 232 */ 233 IDictionary<string, string> allCCode; 234 235 /* 236 * 'compiling' is true when compiling tokens to a word, false 237 * when interpreting them. 238 */ 239 bool compiling; 240 241 /* 242 * 'quitRunLoop' is set to true to trigger exit of the 243 * interpretation loop when the end of the current input file 244 * is reached. 245 */ 246 bool quitRunLoop; 247 248 /* 249 * 'extraCode' is for C code that is to be added as preamble to 250 * the C output. 251 */ 252 List<string> extraCode; 253 254 /* 255 * 'extraCodeDefer' is for C code that is to be added in the C 256 * output _after_ the data and code blocks. 257 */ 258 List<string> extraCodeDefer; 259 260 /* 261 * 'dataBlock' is the data block in which constant data bytes 262 * are accumulated. 263 */ 264 ConstData dataBlock; 265 266 /* 267 * Counter for blocks of constant data. 268 */ 269 long currentBlobID; 270 271 /* 272 * Flow analysis enable flag. 273 */ 274 bool enableFlowAnalysis; 275 276 /* 277 * Data stack size limit. 278 */ 279 int dsLimit; 280 281 /* 282 * Return stack size limit. 283 */ 284 int rsLimit; 285 286 T0Comp() 287 { 288 tokenBuilder = new StringBuilder(); 289 words = new SortedDictionary<string, Word>( 290 StringComparer.Ordinal); 291 savedWordBuilders = new Stack<WordBuilder>(); 292 allCCode = new SortedDictionary<string, string>( 293 StringComparer.Ordinal); 294 compiling = false; 295 extraCode = new List<string>(); 296 extraCodeDefer = new List<string>(); 297 enableFlowAnalysis = true; 298 299 /* 300 * Native words are predefined and implemented only with 301 * native code. Some may be part of the generated output, 302 * if C code is set for them. 303 */ 304 305 /* 306 * add-cc: 307 * Parses next token as a word name, then a C code snippet. 308 * Sets the C code for that word. 309 */ 310 AddNative("add-cc:", false, SType.BLANK, cpu => { 311 string tt = Next(); 312 if (tt == null) { 313 throw new Exception( 314 "EOF reached (missing name)"); 315 } 316 if (allCCode.ContainsKey(tt)) { 317 throw new Exception( 318 "C code already set for: " + tt); 319 } 320 allCCode[tt] = ParseCCode(); 321 }); 322 323 /* 324 * cc: 325 * Parses next token as a word name, then a C code snippet. 326 * A new word is defined, that throws an exception when 327 * invoked during compilation. The C code is set for that 328 * new word. 329 */ 330 AddNative("cc:", false, SType.BLANK, cpu => { 331 string tt = Next(); 332 if (tt == null) { 333 throw new Exception( 334 "EOF reached (missing name)"); 335 } 336 Word w = AddNative(tt, false, cpu2 => { 337 throw new Exception( 338 "C-only word: " + tt); 339 }); 340 if (allCCode.ContainsKey(tt)) { 341 throw new Exception( 342 "C code already set for: " + tt); 343 } 344 SType stackEffect; 345 allCCode[tt] = ParseCCode(out stackEffect); 346 w.StackEffect = stackEffect; 347 }); 348 349 /* 350 * preamble 351 * Parses a C code snippet, then adds it to the generated 352 * output preamble. 353 */ 354 AddNative("preamble", false, SType.BLANK, cpu => { 355 extraCode.Add(ParseCCode()); 356 }); 357 358 /* 359 * postamble 360 * Parses a C code snippet, then adds it to the generated 361 * output after the data and code blocks. 362 */ 363 AddNative("postamble", false, SType.BLANK, cpu => { 364 extraCodeDefer.Add(ParseCCode()); 365 }); 366 367 /* 368 * make-CX 369 * Expects two integers and a string, and makes a 370 * constant that stands for the string as a C constant 371 * expression. The two integers are the expected range 372 * (min-max, inclusive). 373 */ 374 AddNative("make-CX", false, new SType(3, 1), cpu => { 375 TValue c = cpu.Pop(); 376 if (!(c.ptr is TPointerBlob)) { 377 throw new Exception(string.Format( 378 "'{0}' is not a string", c)); 379 } 380 int max = cpu.Pop(); 381 int min = cpu.Pop(); 382 TValue tv = new TValue(0, new TPointerExpr( 383 c.ToString(), min, max)); 384 cpu.Push(tv); 385 }); 386 387 /* 388 * CX (immediate) 389 * Parses two integer constants, then a C code snippet. 390 * It then pushes on the stack, or compiles to the 391 * current word, a value consisting of the given C 392 * expression; the two integers indicate the expected 393 * range (min-max, inclusive) of the C expression when 394 * evaluated. 395 */ 396 AddNative("CX", true, cpu => { 397 string tt = Next(); 398 if (tt == null) { 399 throw new Exception( 400 "EOF reached (missing min value)"); 401 } 402 int min = ParseInteger(tt); 403 tt = Next(); 404 if (tt == null) { 405 throw new Exception( 406 "EOF reached (missing max value)"); 407 } 408 int max = ParseInteger(tt); 409 if (max < min) { 410 throw new Exception("min/max in wrong order"); 411 } 412 TValue tv = new TValue(0, new TPointerExpr( 413 ParseCCode().Trim(), min, max)); 414 if (compiling) { 415 wordBuilder.Literal(tv); 416 } else { 417 cpu.Push(tv); 418 } 419 }); 420 421 /* 422 * co 423 * Interrupt the current execution. This implements 424 * coroutines. It cannot be invoked during compilation. 425 */ 426 AddNative("co", false, SType.BLANK, cpu => { 427 throw new Exception("No coroutine in compile mode"); 428 }); 429 430 /* 431 * : 432 * Parses next token as word name. It begins definition 433 * of that word, setting it as current target for 434 * word building. Any previously opened word is saved 435 * and will become available again as a target when that 436 * new word is finished building. 437 */ 438 AddNative(":", false, cpu => { 439 string tt = Next(); 440 if (tt == null) { 441 throw new Exception( 442 "EOF reached (missing name)"); 443 } 444 if (compiling) { 445 savedWordBuilders.Push(wordBuilder); 446 } else { 447 compiling = true; 448 } 449 wordBuilder = new WordBuilder(this, tt); 450 tt = Next(); 451 if (tt == null) { 452 throw new Exception( 453 "EOF reached (while compiling)"); 454 } 455 if (tt == "(") { 456 SType stackEffect = ParseStackEffectNF(); 457 if (!stackEffect.IsKnown) { 458 throw new Exception( 459 "Invalid stack effect syntax"); 460 } 461 wordBuilder.StackEffect = stackEffect; 462 } else { 463 delayedToken = tt; 464 } 465 }); 466 467 /* 468 * Pops a string as word name, and two integers as stack 469 * effect. It begins definition of that word, setting it 470 * as current target for word building. Any previously 471 * opened word is saved and will become available again as 472 * a target when that new word is finished building. 473 * 474 * Stack effect is the pair 'din dout'. If din is negative, 475 * then the stack effect is "unknown". If din is nonnegative 476 * but dout is negative, then the word is reputed never to 477 * return. 478 */ 479 AddNative("define-word", false, cpu => { 480 int dout = cpu.Pop(); 481 int din = cpu.Pop(); 482 TValue s = cpu.Pop(); 483 if (!(s.ptr is TPointerBlob)) { 484 throw new Exception(string.Format( 485 "Not a string: '{0}'", s)); 486 } 487 string tt = s.ToString(); 488 if (compiling) { 489 savedWordBuilders.Push(wordBuilder); 490 } else { 491 compiling = true; 492 } 493 wordBuilder = new WordBuilder(this, tt); 494 wordBuilder.StackEffect = new SType(din, dout); 495 }); 496 497 /* 498 * ; (immediate) 499 * Ends current word. The current word is registered under 500 * its name, and the previously opened word (if any) becomes 501 * again the building target. 502 */ 503 AddNative(";", true, cpu => { 504 if (!compiling) { 505 throw new Exception("Not compiling"); 506 } 507 Word w = wordBuilder.Build(); 508 string name = w.Name; 509 if (words.ContainsKey(name)) { 510 throw new Exception( 511 "Word already defined: " + name); 512 } 513 words[name] = w; 514 lastWord = w; 515 if (savedWordBuilders.Count > 0) { 516 wordBuilder = savedWordBuilders.Pop(); 517 } else { 518 wordBuilder = null; 519 compiling = false; 520 } 521 }); 522 523 /* 524 * immediate 525 * Sets the last defined word as immediate. 526 */ 527 AddNative("immediate", false, cpu => { 528 if (lastWord == null) { 529 throw new Exception("No word defined yet"); 530 } 531 lastWord.Immediate = true; 532 }); 533 534 /* 535 * literal (immediate) 536 * Pops the current TOS value, and add in the current word 537 * the action of pushing that value. This cannot be used 538 * when no word is being built. 539 */ 540 WordNative wliteral = AddNative("literal", true, cpu => { 541 CheckCompiling(); 542 wordBuilder.Literal(cpu.Pop()); 543 }); 544 545 /* 546 * compile 547 * Pops the current TOS value, which must be an XT (pointer 548 * to a word); the action of calling that word is compiled 549 * in the current word. 550 */ 551 WordNative wcompile = AddNative("compile", false, cpu => { 552 CheckCompiling(); 553 wordBuilder.Call(cpu.Pop().ToXT()); 554 }); 555 556 /* 557 * postpone (immediate) 558 * Parses the next token as a word name, and add to the 559 * current word the action of calling that word. This 560 * basically removes immediatety from the next word. 561 */ 562 AddNative("postpone", true, cpu => { 563 CheckCompiling(); 564 string tt = Next(); 565 if (tt == null) { 566 throw new Exception( 567 "EOF reached (missing name)"); 568 } 569 TValue v; 570 bool isVal = TryParseLiteral(tt, out v); 571 Word w = LookupNF(tt); 572 if (isVal && w != null) { 573 throw new Exception(String.Format( 574 "Ambiguous: both defined word and" 575 + " literal: {0}", tt)); 576 } 577 if (isVal) { 578 wordBuilder.Literal(v); 579 wordBuilder.CallExt(wliteral); 580 } else if (w != null) { 581 if (w.Immediate) { 582 wordBuilder.CallExt(w); 583 } else { 584 wordBuilder.Literal(new TValue(0, 585 new TPointerXT(w))); 586 wordBuilder.CallExt(wcompile); 587 } 588 } else { 589 wordBuilder.Literal(new TValue(0, 590 new TPointerXT(tt))); 591 wordBuilder.CallExt(wcompile); 592 } 593 }); 594 595 /* 596 * Interrupt compilation with an error. 597 */ 598 AddNative("exitvm", false, cpu => { 599 throw new Exception(); 600 }); 601 602 /* 603 * Open a new data block. Its symbolic address is pushed 604 * on the stack. 605 */ 606 AddNative("new-data-block", false, cpu => { 607 dataBlock = new ConstData(this); 608 cpu.Push(new TValue(0, new TPointerBlob(dataBlock))); 609 }); 610 611 /* 612 * Define a new data word. The data address and name are 613 * popped from the stack. 614 */ 615 AddNative("define-data-word", false, cpu => { 616 string name = cpu.Pop().ToString(); 617 TValue va = cpu.Pop(); 618 TPointerBlob tb = va.ptr as TPointerBlob; 619 if (tb == null) { 620 throw new Exception( 621 "Address is not a data area"); 622 } 623 Word w = new WordData(this, name, tb.Blob, va.x); 624 if (words.ContainsKey(name)) { 625 throw new Exception( 626 "Word already defined: " + name); 627 } 628 words[name] = w; 629 lastWord = w; 630 }); 631 632 /* 633 * Get an address pointing at the end of the current 634 * data block. This is the address of the next byte that 635 * will be added. 636 */ 637 AddNative("current-data", false, cpu => { 638 if (dataBlock == null) { 639 throw new Exception( 640 "No current data block"); 641 } 642 cpu.Push(new TValue(dataBlock.Length, 643 new TPointerBlob(dataBlock))); 644 }); 645 646 /* 647 * Add a byte value to the data block. 648 */ 649 AddNative("data-add8", false, cpu => { 650 if (dataBlock == null) { 651 throw new Exception( 652 "No current data block"); 653 } 654 int v = cpu.Pop(); 655 if (v < 0 || v > 0xFF) { 656 throw new Exception( 657 "Byte value out of range: " + v); 658 } 659 dataBlock.Add8((byte)v); 660 }); 661 662 /* 663 * Set a byte value in the data block. 664 */ 665 AddNative("data-set8", false, cpu => { 666 TValue va = cpu.Pop(); 667 TPointerBlob tb = va.ptr as TPointerBlob; 668 if (tb == null) { 669 throw new Exception( 670 "Address is not a data area"); 671 } 672 int v = cpu.Pop(); 673 if (v < 0 || v > 0xFF) { 674 throw new Exception( 675 "Byte value out of range: " + v); 676 } 677 tb.Blob.Set8(va.x, (byte)v); 678 }); 679 680 /* 681 * Get a byte value from a data block. 682 */ 683 AddNative("data-get8", false, new SType(1, 1), cpu => { 684 TValue va = cpu.Pop(); 685 TPointerBlob tb = va.ptr as TPointerBlob; 686 if (tb == null) { 687 throw new Exception( 688 "Address is not a data area"); 689 } 690 int v = tb.Blob.Read8(va.x); 691 cpu.Push(v); 692 }); 693 694 /* 695 * 696 */ 697 AddNative("compile-local-read", false, cpu => { 698 CheckCompiling(); 699 wordBuilder.GetLocal(cpu.Pop().ToString()); 700 }); 701 AddNative("compile-local-write", false, cpu => { 702 CheckCompiling(); 703 wordBuilder.PutLocal(cpu.Pop().ToString()); 704 }); 705 706 AddNative("ahead", true, cpu => { 707 CheckCompiling(); 708 wordBuilder.Ahead(); 709 }); 710 AddNative("begin", true, cpu => { 711 CheckCompiling(); 712 wordBuilder.Begin(); 713 }); 714 AddNative("again", true, cpu => { 715 CheckCompiling(); 716 wordBuilder.Again(); 717 }); 718 AddNative("until", true, cpu => { 719 CheckCompiling(); 720 wordBuilder.AgainIfNot(); 721 }); 722 AddNative("untilnot", true, cpu => { 723 CheckCompiling(); 724 wordBuilder.AgainIf(); 725 }); 726 AddNative("if", true, cpu => { 727 CheckCompiling(); 728 wordBuilder.AheadIfNot(); 729 }); 730 AddNative("ifnot", true, cpu => { 731 CheckCompiling(); 732 wordBuilder.AheadIf(); 733 }); 734 AddNative("then", true, cpu => { 735 CheckCompiling(); 736 wordBuilder.Then(); 737 }); 738 AddNative("cs-pick", false, cpu => { 739 CheckCompiling(); 740 wordBuilder.CSPick(cpu.Pop()); 741 }); 742 AddNative("cs-roll", false, cpu => { 743 CheckCompiling(); 744 wordBuilder.CSRoll(cpu.Pop()); 745 }); 746 AddNative("next-word", false, cpu => { 747 string s = Next(); 748 if (s == null) { 749 throw new Exception("No next word (EOF)"); 750 } 751 cpu.Push(StringToBlob(s)); 752 }); 753 AddNative("parse", false, cpu => { 754 int d = cpu.Pop(); 755 string s = ReadTerm(d); 756 cpu.Push(StringToBlob(s)); 757 }); 758 AddNative("char", false, cpu => { 759 int c = NextChar(); 760 if (c < 0) { 761 throw new Exception("No next character (EOF)"); 762 } 763 cpu.Push(c); 764 }); 765 AddNative("'", false, cpu => { 766 string name = Next(); 767 cpu.Push(new TValue(0, new TPointerXT(name))); 768 }); 769 770 /* 771 * The "execute" word is valid in generated C code, but 772 * since it jumps to a runtime pointer, its actual stack 773 * effect cannot be computed in advance. 774 */ 775 AddNative("execute", false, cpu => { 776 cpu.Pop().Execute(this, cpu); 777 }); 778 779 AddNative("[", true, cpu => { 780 CheckCompiling(); 781 compiling = false; 782 }); 783 AddNative("]", false, cpu => { 784 compiling = true; 785 }); 786 AddNative("(local)", false, cpu => { 787 CheckCompiling(); 788 wordBuilder.DefLocal(cpu.Pop().ToString()); 789 }); 790 AddNative("ret", true, cpu => { 791 CheckCompiling(); 792 wordBuilder.Ret(); 793 }); 794 795 AddNative("drop", false, new SType(1, 0), cpu => { 796 cpu.Pop(); 797 }); 798 AddNative("dup", false, new SType(1, 2), cpu => { 799 cpu.Push(cpu.Peek(0)); 800 }); 801 AddNative("swap", false, new SType(2, 2), cpu => { 802 cpu.Rot(1); 803 }); 804 AddNative("over", false, new SType(2, 3), cpu => { 805 cpu.Push(cpu.Peek(1)); 806 }); 807 AddNative("rot", false, new SType(3, 3), cpu => { 808 cpu.Rot(2); 809 }); 810 AddNative("-rot", false, new SType(3, 3), cpu => { 811 cpu.NRot(2); 812 }); 813 814 /* 815 * "roll" and "pick" are special in that the stack slot 816 * they inspect might be known only at runtime, so an 817 * absolute stack effect cannot be attributed. Instead, 818 * we simply hope that the caller knows what it is doing, 819 * and we use a simple stack effect for just the count 820 * value and picked value. 821 */ 822 AddNative("roll", false, new SType(1, 0), cpu => { 823 cpu.Rot(cpu.Pop()); 824 }); 825 AddNative("pick", false, new SType(1, 1), cpu => { 826 cpu.Push(cpu.Peek(cpu.Pop())); 827 }); 828 829 AddNative("+", false, new SType(2, 1), cpu => { 830 TValue b = cpu.Pop(); 831 TValue a = cpu.Pop(); 832 if (b.ptr == null) { 833 a.x += (int)b; 834 cpu.Push(a); 835 } else if (a.ptr is TPointerBlob 836 && b.ptr is TPointerBlob) 837 { 838 cpu.Push(StringToBlob( 839 a.ToString() + b.ToString())); 840 } else { 841 throw new Exception(string.Format( 842 "Cannot add '{0}' to '{1}'", b, a)); 843 } 844 }); 845 AddNative("-", false, new SType(2, 1), cpu => { 846 /* 847 * We can subtract two pointers, provided that 848 * they point to the same blob. Otherwise, 849 * the subtraction second operand must be an 850 * integer. 851 */ 852 TValue b = cpu.Pop(); 853 TValue a = cpu.Pop(); 854 TPointerBlob ap = a.ptr as TPointerBlob; 855 TPointerBlob bp = b.ptr as TPointerBlob; 856 if (ap != null && bp != null && ap.Blob == bp.Blob) { 857 cpu.Push(new TValue(a.x - b.x)); 858 return; 859 } 860 int bx = b; 861 a.x -= bx; 862 cpu.Push(a); 863 }); 864 AddNative("neg", false, new SType(1, 1), cpu => { 865 int ax = cpu.Pop(); 866 cpu.Push(-ax); 867 }); 868 AddNative("*", false, new SType(2, 1), cpu => { 869 int bx = cpu.Pop(); 870 int ax = cpu.Pop(); 871 cpu.Push(ax * bx); 872 }); 873 AddNative("/", false, new SType(2, 1), cpu => { 874 int bx = cpu.Pop(); 875 int ax = cpu.Pop(); 876 cpu.Push(ax / bx); 877 }); 878 AddNative("u/", false, new SType(2, 1), cpu => { 879 uint bx = cpu.Pop(); 880 uint ax = cpu.Pop(); 881 cpu.Push(ax / bx); 882 }); 883 AddNative("%", false, new SType(2, 1), cpu => { 884 int bx = cpu.Pop(); 885 int ax = cpu.Pop(); 886 cpu.Push(ax % bx); 887 }); 888 AddNative("u%", false, new SType(2, 1), cpu => { 889 uint bx = cpu.Pop(); 890 uint ax = cpu.Pop(); 891 cpu.Push(ax % bx); 892 }); 893 AddNative("<", false, new SType(2, 1), cpu => { 894 int bx = cpu.Pop(); 895 int ax = cpu.Pop(); 896 cpu.Push(ax < bx); 897 }); 898 AddNative("<=", false, new SType(2, 1), cpu => { 899 int bx = cpu.Pop(); 900 int ax = cpu.Pop(); 901 cpu.Push(ax <= bx); 902 }); 903 AddNative(">", false, new SType(2, 1), cpu => { 904 int bx = cpu.Pop(); 905 int ax = cpu.Pop(); 906 cpu.Push(ax > bx); 907 }); 908 AddNative(">=", false, new SType(2, 1), cpu => { 909 int bx = cpu.Pop(); 910 int ax = cpu.Pop(); 911 cpu.Push(ax >= bx); 912 }); 913 AddNative("=", false, new SType(2, 1), cpu => { 914 TValue b = cpu.Pop(); 915 TValue a = cpu.Pop(); 916 cpu.Push(a.Equals(b)); 917 }); 918 AddNative("<>", false, new SType(2, 1), cpu => { 919 TValue b = cpu.Pop(); 920 TValue a = cpu.Pop(); 921 cpu.Push(!a.Equals(b)); 922 }); 923 AddNative("u<", false, new SType(2, 1), cpu => { 924 uint bx = cpu.Pop().UInt; 925 uint ax = cpu.Pop().UInt; 926 cpu.Push(new TValue(ax < bx)); 927 }); 928 AddNative("u<=", false, new SType(2, 1), cpu => { 929 uint bx = cpu.Pop().UInt; 930 uint ax = cpu.Pop().UInt; 931 cpu.Push(new TValue(ax <= bx)); 932 }); 933 AddNative("u>", false, new SType(2, 1), cpu => { 934 uint bx = cpu.Pop().UInt; 935 uint ax = cpu.Pop().UInt; 936 cpu.Push(new TValue(ax > bx)); 937 }); 938 AddNative("u>=", false, new SType(2, 1), cpu => { 939 uint bx = cpu.Pop(); 940 uint ax = cpu.Pop(); 941 cpu.Push(ax >= bx); 942 }); 943 AddNative("and", false, new SType(2, 1), cpu => { 944 uint bx = cpu.Pop(); 945 uint ax = cpu.Pop(); 946 cpu.Push(ax & bx); 947 }); 948 AddNative("or", false, new SType(2, 1), cpu => { 949 uint bx = cpu.Pop(); 950 uint ax = cpu.Pop(); 951 cpu.Push(ax | bx); 952 }); 953 AddNative("xor", false, new SType(2, 1), cpu => { 954 uint bx = cpu.Pop(); 955 uint ax = cpu.Pop(); 956 cpu.Push(ax ^ bx); 957 }); 958 AddNative("not", false, new SType(1, 1), cpu => { 959 uint ax = cpu.Pop(); 960 cpu.Push(~ax); 961 }); 962 AddNative("<<", false, new SType(2, 1), cpu => { 963 int count = cpu.Pop(); 964 if (count < 0 || count > 31) { 965 throw new Exception("Invalid shift count"); 966 } 967 uint ax = cpu.Pop(); 968 cpu.Push(ax << count); 969 }); 970 AddNative(">>", false, new SType(2, 1), cpu => { 971 int count = cpu.Pop(); 972 if (count < 0 || count > 31) { 973 throw new Exception("Invalid shift count"); 974 } 975 int ax = cpu.Pop(); 976 cpu.Push(ax >> count); 977 }); 978 AddNative("u>>", false, new SType(2, 1), cpu => { 979 int count = cpu.Pop(); 980 if (count < 0 || count > 31) { 981 throw new Exception("Invalid shift count"); 982 } 983 uint ax = cpu.Pop(); 984 cpu.Push(ax >> count); 985 }); 986 987 AddNative(".", false, new SType(1, 0), cpu => { 988 Console.Write(" {0}", cpu.Pop().ToString()); 989 }); 990 AddNative(".s", false, SType.BLANK, cpu => { 991 int n = cpu.Depth; 992 for (int i = n - 1; i >= 0; i --) { 993 Console.Write(" {0}", cpu.Peek(i).ToString()); 994 } 995 }); 996 AddNative("putc", false, new SType(1, 0), cpu => { 997 Console.Write((char)cpu.Pop()); 998 }); 999 AddNative("puts", false, new SType(1, 0), cpu => { 1000 Console.Write("{0}", cpu.Pop().ToString()); 1001 }); 1002 AddNative("cr", false, SType.BLANK, cpu => { 1003 Console.WriteLine(); 1004 }); 1005 AddNative("eqstr", false, new SType(2, 1), cpu => { 1006 string s2 = cpu.Pop().ToString(); 1007 string s1 = cpu.Pop().ToString(); 1008 cpu.Push(s1 == s2); 1009 }); 1010 } 1011 1012 WordNative AddNative(string name, bool immediate, 1013 WordNative.NativeRun code) 1014 { 1015 return AddNative(name, immediate, SType.UNKNOWN, code); 1016 } 1017 1018 WordNative AddNative(string name, bool immediate, SType stackEffect, 1019 WordNative.NativeRun code) 1020 { 1021 if (words.ContainsKey(name)) { 1022 throw new Exception( 1023 "Word already defined: " + name); 1024 } 1025 WordNative w = new WordNative(this, name, code); 1026 w.Immediate = immediate; 1027 w.StackEffect = stackEffect; 1028 words[name] = w; 1029 return w; 1030 } 1031 1032 internal long NextBlobID() 1033 { 1034 return currentBlobID ++; 1035 } 1036 1037 int NextChar() 1038 { 1039 int c = delayedChar; 1040 if (c >= 0) { 1041 delayedChar = Int32.MinValue; 1042 } else if (c > Int32.MinValue) { 1043 delayedChar = -(c + 1); 1044 c = '\n'; 1045 } else { 1046 c = currentInput.Read(); 1047 } 1048 if (c == '\r') { 1049 if (delayedChar >= 0) { 1050 c = delayedChar; 1051 delayedChar = Int32.MinValue; 1052 } else { 1053 c = currentInput.Read(); 1054 } 1055 if (c != '\n') { 1056 delayedChar = c; 1057 c = '\n'; 1058 } 1059 } 1060 return c; 1061 } 1062 1063 /* 1064 * Un-read the character value 'c'. That value MUST be the one 1065 * that was obtained from NextChar(). 1066 */ 1067 void Unread(int c) 1068 { 1069 if (c < 0) { 1070 return; 1071 } 1072 if (delayedChar < 0) { 1073 if (delayedChar != Int32.MinValue) { 1074 throw new Exception( 1075 "Already two delayed characters"); 1076 } 1077 delayedChar = c; 1078 } else if (c != '\n') { 1079 throw new Exception("Cannot delay two characters"); 1080 } else { 1081 delayedChar = -(delayedChar + 1); 1082 } 1083 } 1084 1085 string Next() 1086 { 1087 string r = delayedToken; 1088 if (r != null) { 1089 delayedToken = null; 1090 return r; 1091 } 1092 tokenBuilder.Length = 0; 1093 int c; 1094 for (;;) { 1095 c = NextChar(); 1096 if (c < 0) { 1097 return null; 1098 } 1099 if (!IsWS(c)) { 1100 break; 1101 } 1102 } 1103 if (c == '"') { 1104 return ParseString(); 1105 } 1106 for (;;) { 1107 tokenBuilder.Append((char)c); 1108 c = NextChar(); 1109 if (c < 0 || IsWS(c)) { 1110 Unread(c); 1111 return tokenBuilder.ToString(); 1112 } 1113 } 1114 } 1115 1116 string ParseCCode() 1117 { 1118 SType stackEffect; 1119 string r = ParseCCode(out stackEffect); 1120 if (stackEffect.IsKnown) { 1121 throw new Exception( 1122 "Stack effect forbidden in this declaration"); 1123 } 1124 return r; 1125 } 1126 1127 string ParseCCode(out SType stackEffect) 1128 { 1129 string s = ParseCCodeNF(out stackEffect); 1130 if (s == null) { 1131 throw new Exception("Error while parsing C code"); 1132 } 1133 return s; 1134 } 1135 1136 string ParseCCodeNF(out SType stackEffect) 1137 { 1138 stackEffect = SType.UNKNOWN; 1139 for (;;) { 1140 int c = NextChar(); 1141 if (c < 0) { 1142 return null; 1143 } 1144 if (!IsWS(c)) { 1145 if (c == '(') { 1146 if (stackEffect.IsKnown) { 1147 Unread(c); 1148 return null; 1149 } 1150 stackEffect = ParseStackEffectNF(); 1151 if (!stackEffect.IsKnown) { 1152 return null; 1153 } 1154 continue; 1155 } else if (c != '{') { 1156 Unread(c); 1157 return null; 1158 } 1159 break; 1160 } 1161 } 1162 StringBuilder sb = new StringBuilder(); 1163 int count = 1; 1164 for (;;) { 1165 int c = NextChar(); 1166 if (c < 0) { 1167 return null; 1168 } 1169 switch (c) { 1170 case '{': 1171 count ++; 1172 break; 1173 case '}': 1174 if (-- count == 0) { 1175 return sb.ToString(); 1176 } 1177 break; 1178 } 1179 sb.Append((char)c); 1180 } 1181 } 1182 1183 /* 1184 * Parse a stack effect declaration. This method assumes that the 1185 * opening parenthesis has just been read. If the parsing fails, 1186 * then this method returns SType.UNKNOWN. 1187 */ 1188 SType ParseStackEffectNF() 1189 { 1190 bool seenSep = false; 1191 bool seenBang = false; 1192 int din = 0, dout = 0; 1193 for (;;) { 1194 string t = Next(); 1195 if (t == null) { 1196 return SType.UNKNOWN; 1197 } 1198 if (t == "--") { 1199 if (seenSep) { 1200 return SType.UNKNOWN; 1201 } 1202 seenSep = true; 1203 } else if (t == ")") { 1204 if (seenSep) { 1205 if (seenBang && dout == 1) { 1206 dout = -1; 1207 } 1208 return new SType(din, dout); 1209 } else { 1210 return SType.UNKNOWN; 1211 } 1212 } else { 1213 if (seenSep) { 1214 if (dout == 0 && t == "!") { 1215 seenBang = true; 1216 } 1217 dout ++; 1218 } else { 1219 din ++; 1220 } 1221 } 1222 } 1223 } 1224 1225 string ParseString() 1226 { 1227 StringBuilder sb = new StringBuilder(); 1228 sb.Append('"'); 1229 bool lcwb = false; 1230 int hexNum = 0; 1231 int acc = 0; 1232 for (;;) { 1233 int c = NextChar(); 1234 if (c < 0) { 1235 throw new Exception( 1236 "Unfinished literal string"); 1237 } 1238 if (hexNum > 0) { 1239 int d = HexVal(c); 1240 if (d < 0) { 1241 throw new Exception(String.Format( 1242 "not an hex digit: U+{0:X4}", 1243 c)); 1244 } 1245 acc = (acc << 4) + d; 1246 if (-- hexNum == 0) { 1247 sb.Append((char)acc); 1248 acc = 0; 1249 } 1250 } else if (lcwb) { 1251 lcwb = false; 1252 switch (c) { 1253 case '\n': SkipNL(); break; 1254 case 'x': 1255 hexNum = 2; 1256 break; 1257 case 'u': 1258 hexNum = 4; 1259 break; 1260 default: 1261 sb.Append(SingleCharEscape(c)); 1262 break; 1263 } 1264 } else { 1265 switch (c) { 1266 case '"': 1267 return sb.ToString(); 1268 case '\\': 1269 lcwb = true; 1270 break; 1271 default: 1272 sb.Append((char)c); 1273 break; 1274 } 1275 } 1276 } 1277 } 1278 1279 static char SingleCharEscape(int c) 1280 { 1281 switch (c) { 1282 case 'n': return '\n'; 1283 case 'r': return '\r'; 1284 case 't': return '\t'; 1285 case 's': return ' '; 1286 default: 1287 return (char)c; 1288 } 1289 } 1290 1291 /* 1292 * A backslash+newline sequence occurred in a literal string; we 1293 * check and consume the newline escape sequence (whitespace at 1294 * start of next line, then a double-quote character). 1295 */ 1296 void SkipNL() 1297 { 1298 for (;;) { 1299 int c = NextChar(); 1300 if (c < 0) { 1301 throw new Exception("EOF in literal string"); 1302 } 1303 if (c == '\n') { 1304 throw new Exception( 1305 "Unescaped newline in literal string"); 1306 } 1307 if (IsWS(c)) { 1308 continue; 1309 } 1310 if (c == '"') { 1311 return; 1312 } 1313 throw new Exception( 1314 "Invalid newline escape in literal string"); 1315 } 1316 } 1317 1318 static char DecodeCharConst(string t) 1319 { 1320 if (t.Length == 1 && t[0] != '\\') { 1321 return t[0]; 1322 } 1323 if (t.Length >= 2 && t[0] == '\\') { 1324 switch (t[1]) { 1325 case 'x': 1326 if (t.Length == 4) { 1327 int x = DecHex(t.Substring(2)); 1328 if (x >= 0) { 1329 return (char)x; 1330 } 1331 } 1332 break; 1333 case 'u': 1334 if (t.Length == 6) { 1335 int x = DecHex(t.Substring(2)); 1336 if (x >= 0) { 1337 return (char)x; 1338 } 1339 } 1340 break; 1341 default: 1342 if (t.Length == 2) { 1343 return SingleCharEscape(t[1]); 1344 } 1345 break; 1346 } 1347 } 1348 throw new Exception("Invalid literal char: `" + t); 1349 } 1350 1351 static int DecHex(string s) 1352 { 1353 int acc = 0; 1354 foreach (char c in s) { 1355 int d = HexVal(c); 1356 if (d < 0) { 1357 return -1; 1358 } 1359 acc = (acc << 4) + d; 1360 } 1361 return acc; 1362 } 1363 1364 static int HexVal(int c) 1365 { 1366 if (c >= '0' && c <= '9') { 1367 return c - '0'; 1368 } else if (c >= 'A' && c <= 'F') { 1369 return c - ('A' - 10); 1370 } else if (c >= 'a' && c <= 'f') { 1371 return c - ('a' - 10); 1372 } else { 1373 return -1; 1374 } 1375 } 1376 1377 string ReadTerm(int ct) 1378 { 1379 StringBuilder sb = new StringBuilder(); 1380 for (;;) { 1381 int c = NextChar(); 1382 if (c < 0) { 1383 throw new Exception(String.Format( 1384 "EOF reached before U+{0:X4}", ct)); 1385 } 1386 if (c == ct) { 1387 return sb.ToString(); 1388 } 1389 sb.Append((char)c); 1390 } 1391 } 1392 1393 static bool IsWS(int c) 1394 { 1395 return c <= 32; 1396 } 1397 1398 void ProcessInput(TextReader tr) 1399 { 1400 this.currentInput = tr; 1401 delayedChar = -1; 1402 Word w = new WordNative(this, "toplevel", 1403 xcpu => { CompileStep(xcpu); }); 1404 CPU cpu = new CPU(); 1405 Opcode[] code = new Opcode[] { 1406 new OpcodeCall(w), 1407 new OpcodeJumpUncond(-2) 1408 }; 1409 quitRunLoop = false; 1410 cpu.Enter(code, 0); 1411 for (;;) { 1412 if (quitRunLoop) { 1413 break; 1414 } 1415 Opcode op = cpu.ipBuf[cpu.ipOff ++]; 1416 op.Run(cpu); 1417 } 1418 } 1419 1420 void CompileStep(CPU cpu) 1421 { 1422 string tt = Next(); 1423 if (tt == null) { 1424 if (compiling) { 1425 throw new Exception("EOF while compiling"); 1426 } 1427 quitRunLoop = true; 1428 return; 1429 } 1430 TValue v; 1431 bool isVal = TryParseLiteral(tt, out v); 1432 Word w = LookupNF(tt); 1433 if (isVal && w != null) { 1434 throw new Exception(String.Format( 1435 "Ambiguous: both defined word and literal: {0}", 1436 tt)); 1437 } 1438 if (compiling) { 1439 if (isVal) { 1440 wordBuilder.Literal(v); 1441 } else if (w != null) { 1442 if (w.Immediate) { 1443 w.Run(cpu); 1444 } else { 1445 wordBuilder.CallExt(w); 1446 } 1447 } else { 1448 wordBuilder.Call(tt); 1449 } 1450 } else { 1451 if (isVal) { 1452 cpu.Push(v); 1453 } else if (w != null) { 1454 w.Run(cpu); 1455 } else { 1456 throw new Exception(String.Format( 1457 "Unknown word: '{0}'", tt)); 1458 } 1459 } 1460 } 1461 1462 string GetCCode(string name) 1463 { 1464 string ccode; 1465 allCCode.TryGetValue(name, out ccode); 1466 return ccode; 1467 } 1468 1469 void Generate(string outBase, string coreRun, 1470 params string[] entryPoints) 1471 { 1472 /* 1473 * Gather all words that are part of the generated 1474 * code. This is done by exploring references 1475 * transitively. All such words are thus implicitly 1476 * resolved. 1477 */ 1478 IDictionary<string, Word> wordSet = 1479 new SortedDictionary<string, Word>( 1480 StringComparer.Ordinal); 1481 Queue<Word> tx = new Queue<Word>(); 1482 foreach (string ep in entryPoints) { 1483 if (wordSet.ContainsKey(ep)) { 1484 continue; 1485 } 1486 Word w = Lookup(ep); 1487 wordSet[w.Name] = w; 1488 tx.Enqueue(w); 1489 } 1490 while (tx.Count > 0) { 1491 Word w = tx.Dequeue(); 1492 foreach (Word w2 in w.GetReferences()) { 1493 if (wordSet.ContainsKey(w2.Name)) { 1494 continue; 1495 } 1496 wordSet[w2.Name] = w2; 1497 tx.Enqueue(w2); 1498 } 1499 } 1500 1501 /* 1502 * Do flow analysis. 1503 */ 1504 if (enableFlowAnalysis) { 1505 foreach (string ep in entryPoints) { 1506 Word w = wordSet[ep]; 1507 w.AnalyseFlow(); 1508 Console.WriteLine("{0}: ds={1} rs={2}", 1509 ep, w.MaxDataStack, w.MaxReturnStack); 1510 if (w.MaxDataStack > dsLimit) { 1511 throw new Exception("'" + ep 1512 + "' exceeds data stack limit"); 1513 } 1514 if (w.MaxReturnStack > rsLimit) { 1515 throw new Exception("'" + ep 1516 + "' exceeds return stack" 1517 + " limit"); 1518 } 1519 } 1520 } 1521 1522 /* 1523 * Gather referenced data areas and compute their 1524 * addresses in the generated data block. The address 1525 * 0 in the data block is unaffected so that no 1526 * valid runtime pointer is equal to null. 1527 */ 1528 IDictionary<long, ConstData> blocks = 1529 new SortedDictionary<long, ConstData>(); 1530 foreach (Word w in wordSet.Values) { 1531 foreach (ConstData cd in w.GetDataBlocks()) { 1532 blocks[cd.ID] = cd; 1533 } 1534 } 1535 int dataLen = 1; 1536 foreach (ConstData cd in blocks.Values) { 1537 cd.Address = dataLen; 1538 dataLen += cd.Length; 1539 } 1540 1541 /* 1542 * Generated code is a sequence of "slot numbers", each 1543 * referencing either a piece of explicit C code, or an 1544 * entry in the table of interpreted words. 1545 * 1546 * Opcodes other than "call" get the slots 0 to 6: 1547 * 1548 * 0 ret no argument 1549 * 1 const signed value 1550 * 2 get local local number 1551 * 3 put local local number 1552 * 4 jump signed offset 1553 * 5 jump if signed offset 1554 * 6 jump if not signed offset 1555 * 1556 * The argument, if any, is in "7E" format: the value is 1557 * encoded in 7-bit chunk, with big-endian signed 1558 * convention. Each 7-bit chunk is encoded over one byte; 1559 * the upper bit is 1 for all chunks except the last one. 1560 * 1561 * Words with explicit C code get the slot numbers 1562 * immediately after 6. Interpreted words come afterwards. 1563 */ 1564 IDictionary<string, int> slots = new Dictionary<string, int>(); 1565 int curSlot = 7; 1566 1567 /* 1568 * Get explicit C code for words which have such code. 1569 * We use string equality on C code so that words with 1570 * identical implementations get merged. 1571 * 1572 * We also check that words with no explicit C code are 1573 * interpreted. 1574 */ 1575 IDictionary<string, int> ccodeUni = 1576 new Dictionary<string, int>(); 1577 IDictionary<int, string> ccodeNames = 1578 new Dictionary<int, string>(); 1579 foreach (Word w in wordSet.Values) { 1580 string ccode = GetCCode(w.Name); 1581 if (ccode == null) { 1582 if (w is WordNative) { 1583 throw new Exception(String.Format( 1584 "No C code for native '{0}'", 1585 w.Name)); 1586 } 1587 continue; 1588 } 1589 int sn; 1590 if (ccodeUni.ContainsKey(ccode)) { 1591 sn = ccodeUni[ccode]; 1592 ccodeNames[sn] += " " + EscapeCComment(w.Name); 1593 } else { 1594 sn = curSlot ++; 1595 ccodeUni[ccode] = sn; 1596 ccodeNames[sn] = EscapeCComment(w.Name); 1597 } 1598 slots[w.Name] = sn; 1599 w.Slot = sn; 1600 } 1601 1602 /* 1603 * Assign slot values to all remaining words; we know they 1604 * are all interpreted. 1605 */ 1606 int slotInterpreted = curSlot; 1607 foreach (Word w in wordSet.Values) { 1608 if (GetCCode(w.Name) != null) { 1609 continue; 1610 } 1611 int sn = curSlot ++; 1612 slots[w.Name] = sn; 1613 w.Slot = sn; 1614 } 1615 int numInterpreted = curSlot - slotInterpreted; 1616 1617 /* 1618 * Verify that all entry points are interpreted words. 1619 */ 1620 foreach (string ep in entryPoints) { 1621 if (GetCCode(ep) != null) { 1622 throw new Exception( 1623 "Non-interpreted entry point"); 1624 } 1625 } 1626 1627 /* 1628 * Compute the code block. Each word (without any C code) 1629 * yields some CodeElement instances. 1630 */ 1631 List<CodeElement> gcodeList = new List<CodeElement>(); 1632 CodeElement[] interpretedEntry = 1633 new CodeElement[numInterpreted]; 1634 foreach (Word w in wordSet.Values) { 1635 if (GetCCode(w.Name) != null) { 1636 continue; 1637 } 1638 int n = gcodeList.Count; 1639 w.GenerateCodeElements(gcodeList); 1640 interpretedEntry[w.Slot - slotInterpreted] = 1641 gcodeList[n]; 1642 } 1643 CodeElement[] gcode = gcodeList.ToArray(); 1644 1645 /* 1646 * If there are less than 256 words in total (C + 1647 * interpreted) then we can use "one-byte code" which is 1648 * more compact when the number of words is in the 1649 * 128..255 range. 1650 */ 1651 bool oneByteCode; 1652 if (slotInterpreted + numInterpreted >= 256) { 1653 Console.WriteLine("WARNING: more than 255 words"); 1654 oneByteCode = false; 1655 } else { 1656 oneByteCode = true; 1657 } 1658 1659 /* 1660 * Compute all addresses and offsets. This loops until 1661 * the addresses stabilize. 1662 */ 1663 int totalLen = -1; 1664 int[] gcodeLen = new int[gcode.Length]; 1665 for (;;) { 1666 for (int i = 0; i < gcode.Length; i ++) { 1667 gcodeLen[i] = gcode[i].GetLength(oneByteCode); 1668 } 1669 int off = 0; 1670 for (int i = 0; i < gcode.Length; i ++) { 1671 gcode[i].Address = off; 1672 gcode[i].LastLength = gcodeLen[i]; 1673 off += gcodeLen[i]; 1674 } 1675 if (off == totalLen) { 1676 break; 1677 } 1678 totalLen = off; 1679 } 1680 1681 /* 1682 * Produce output file. 1683 */ 1684 using (TextWriter tw = File.CreateText(outBase + ".c")) { 1685 tw.NewLine = "\n"; 1686 1687 tw.WriteLine("{0}", 1688 @"/* Automatically generated code; do not modify directly. */ 1689 1690 #include <stddef.h> 1691 #include <stdint.h> 1692 1693 typedef struct { 1694 uint32_t *dp; 1695 uint32_t *rp; 1696 const unsigned char *ip; 1697 } t0_context; 1698 1699 static uint32_t 1700 t0_parse7E_unsigned(const unsigned char **p) 1701 { 1702 uint32_t x; 1703 1704 x = 0; 1705 for (;;) { 1706 unsigned y; 1707 1708 y = *(*p) ++; 1709 x = (x << 7) | (uint32_t)(y & 0x7F); 1710 if (y < 0x80) { 1711 return x; 1712 } 1713 } 1714 } 1715 1716 static int32_t 1717 t0_parse7E_signed(const unsigned char **p) 1718 { 1719 int neg; 1720 uint32_t x; 1721 1722 neg = ((**p) >> 6) & 1; 1723 x = (uint32_t)-neg; 1724 for (;;) { 1725 unsigned y; 1726 1727 y = *(*p) ++; 1728 x = (x << 7) | (uint32_t)(y & 0x7F); 1729 if (y < 0x80) { 1730 if (neg) { 1731 return -(int32_t)~x - 1; 1732 } else { 1733 return (int32_t)x; 1734 } 1735 } 1736 } 1737 } 1738 1739 #define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80) 1740 #define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F) 1741 #define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8) 1742 #define T0_INT1(x) T0_FBYTE(x, 0) 1743 #define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0) 1744 #define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) 1745 #define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) 1746 #define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) 1747 1748 /* static const unsigned char t0_datablock[]; */ 1749 "); 1750 1751 /* 1752 * Add declarations (not definitions) for the 1753 * entry point initialisation functions, and the 1754 * runner. 1755 */ 1756 tw.WriteLine(); 1757 foreach (string ep in entryPoints) { 1758 tw.WriteLine("void {0}_init_{1}(void *t0ctx);", 1759 coreRun, ep); 1760 } 1761 tw.WriteLine(); 1762 tw.WriteLine("void {0}_run(void *t0ctx);", coreRun); 1763 1764 /* 1765 * Add preamble elements here. They may be needed 1766 * for evaluating constant expressions in the 1767 * code block. 1768 */ 1769 foreach (string pp in extraCode) { 1770 tw.WriteLine(); 1771 tw.WriteLine("{0}", pp); 1772 } 1773 1774 BlobWriter bw; 1775 tw.WriteLine(); 1776 tw.Write("static const unsigned char" 1777 + " t0_datablock[] = {"); 1778 bw = new BlobWriter(tw, 78, 1); 1779 bw.Append((byte)0); 1780 foreach (ConstData cd in blocks.Values) { 1781 cd.Encode(bw); 1782 } 1783 tw.WriteLine(); 1784 tw.WriteLine("};"); 1785 1786 tw.WriteLine(); 1787 tw.Write("static const unsigned char" 1788 + " t0_codeblock[] = {"); 1789 bw = new BlobWriter(tw, 78, 1); 1790 foreach (CodeElement ce in gcode) { 1791 ce.Encode(bw, oneByteCode); 1792 } 1793 tw.WriteLine(); 1794 tw.WriteLine("};"); 1795 1796 tw.WriteLine(); 1797 tw.Write("static const uint16_t t0_caddr[] = {"); 1798 for (int i = 0; i < interpretedEntry.Length; i ++) { 1799 if (i != 0) { 1800 tw.Write(','); 1801 } 1802 tw.WriteLine(); 1803 tw.Write("\t{0}", interpretedEntry[i].Address); 1804 } 1805 tw.WriteLine(); 1806 tw.WriteLine("};"); 1807 1808 tw.WriteLine(); 1809 tw.WriteLine("#define T0_INTERPRETED {0}", 1810 slotInterpreted); 1811 tw.WriteLine(); 1812 tw.WriteLine("{0}", 1813 @"#define T0_ENTER(ip, rp, slot) do { \ 1814 const unsigned char *t0_newip; \ 1815 uint32_t t0_lnum; \ 1816 t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \ 1817 t0_lnum = t0_parse7E_unsigned(&t0_newip); \ 1818 (rp) += t0_lnum; \ 1819 *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \ 1820 (ip) = t0_newip; \ 1821 } while (0)"); 1822 tw.WriteLine(); 1823 tw.WriteLine("{0}", 1824 @"#define T0_DEFENTRY(name, slot) \ 1825 void \ 1826 name(void *ctx) \ 1827 { \ 1828 t0_context *t0ctx = ctx; \ 1829 t0ctx->ip = &t0_codeblock[0]; \ 1830 T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \ 1831 }"); 1832 1833 tw.WriteLine(); 1834 foreach (string ep in entryPoints) { 1835 tw.WriteLine("T0_DEFENTRY({0}, {1})", 1836 coreRun + "_init_" + ep, 1837 wordSet[ep].Slot); 1838 } 1839 tw.WriteLine(); 1840 if (oneByteCode) { 1841 tw.WriteLine("{0}", 1842 @"#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++)"); 1843 } else { 1844 tw.WriteLine("{0}", 1845 @"#define T0_NEXT(t0ipp) t0_parse7E_unsigned(t0ipp)"); 1846 } 1847 tw.WriteLine(); 1848 tw.WriteLine("void"); 1849 tw.WriteLine("{0}_run(void *t0ctx)", coreRun); 1850 tw.WriteLine("{0}", 1851 @"{ 1852 uint32_t *dp, *rp; 1853 const unsigned char *ip; 1854 1855 #define T0_LOCAL(x) (*(rp - 2 - (x))) 1856 #define T0_POP() (*-- dp) 1857 #define T0_POPi() (*(int32_t *)(-- dp)) 1858 #define T0_PEEK(x) (*(dp - 1 - (x))) 1859 #define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x))) 1860 #define T0_PUSH(v) do { *dp = (v); dp ++; } while (0) 1861 #define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0) 1862 #define T0_RPOP() (*-- rp) 1863 #define T0_RPOPi() (*(int32_t *)(-- rp)) 1864 #define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0) 1865 #define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0) 1866 #define T0_ROLL(x) do { \ 1867 size_t t0len = (size_t)(x); \ 1868 uint32_t t0tmp = *(dp - 1 - t0len); \ 1869 memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \ 1870 *(dp - 1) = t0tmp; \ 1871 } while (0) 1872 #define T0_SWAP() do { \ 1873 uint32_t t0tmp = *(dp - 2); \ 1874 *(dp - 2) = *(dp - 1); \ 1875 *(dp - 1) = t0tmp; \ 1876 } while (0) 1877 #define T0_ROT() do { \ 1878 uint32_t t0tmp = *(dp - 3); \ 1879 *(dp - 3) = *(dp - 2); \ 1880 *(dp - 2) = *(dp - 1); \ 1881 *(dp - 1) = t0tmp; \ 1882 } while (0) 1883 #define T0_NROT() do { \ 1884 uint32_t t0tmp = *(dp - 1); \ 1885 *(dp - 1) = *(dp - 2); \ 1886 *(dp - 2) = *(dp - 3); \ 1887 *(dp - 3) = t0tmp; \ 1888 } while (0) 1889 #define T0_PICK(x) do { \ 1890 uint32_t t0depth = (x); \ 1891 T0_PUSH(T0_PEEK(t0depth)); \ 1892 } while (0) 1893 #define T0_CO() do { \ 1894 goto t0_exit; \ 1895 } while (0) 1896 #define T0_RET() goto t0_next 1897 1898 dp = ((t0_context *)t0ctx)->dp; 1899 rp = ((t0_context *)t0ctx)->rp; 1900 ip = ((t0_context *)t0ctx)->ip; 1901 goto t0_next; 1902 for (;;) { 1903 uint32_t t0x; 1904 1905 t0_next: 1906 t0x = T0_NEXT(&ip); 1907 if (t0x < T0_INTERPRETED) { 1908 switch (t0x) { 1909 int32_t t0off; 1910 1911 case 0: /* ret */ 1912 t0x = T0_RPOP(); 1913 rp -= (t0x >> 16); 1914 t0x &= 0xFFFF; 1915 if (t0x == 0) { 1916 ip = NULL; 1917 goto t0_exit; 1918 } 1919 ip = &t0_codeblock[t0x]; 1920 break; 1921 case 1: /* literal constant */ 1922 T0_PUSHi(t0_parse7E_signed(&ip)); 1923 break; 1924 case 2: /* read local */ 1925 T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip))); 1926 break; 1927 case 3: /* write local */ 1928 T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP(); 1929 break; 1930 case 4: /* jump */ 1931 t0off = t0_parse7E_signed(&ip); 1932 ip += t0off; 1933 break; 1934 case 5: /* jump if */ 1935 t0off = t0_parse7E_signed(&ip); 1936 if (T0_POP()) { 1937 ip += t0off; 1938 } 1939 break; 1940 case 6: /* jump if not */ 1941 t0off = t0_parse7E_signed(&ip); 1942 if (!T0_POP()) { 1943 ip += t0off; 1944 } 1945 break;"); 1946 1947 SortedDictionary<int, string> nccode = 1948 new SortedDictionary<int, string>(); 1949 foreach (string k in ccodeUni.Keys) { 1950 nccode[ccodeUni[k]] = k; 1951 } 1952 foreach (int sn in nccode.Keys) { 1953 tw.WriteLine( 1954 @" case {0}: {{ 1955 /* {1} */ 1956 {2} 1957 }} 1958 break;", sn, ccodeNames[sn], nccode[sn]); 1959 } 1960 1961 tw.WriteLine( 1962 @" } 1963 1964 } else { 1965 T0_ENTER(ip, rp, t0x); 1966 } 1967 } 1968 t0_exit: 1969 ((t0_context *)t0ctx)->dp = dp; 1970 ((t0_context *)t0ctx)->rp = rp; 1971 ((t0_context *)t0ctx)->ip = ip; 1972 }"); 1973 1974 /* 1975 * Add the "postamblr" elements here. These are 1976 * elements that may need access to the data 1977 * block or code block, so they must occur after 1978 * their definition. 1979 */ 1980 foreach (string pp in extraCodeDefer) { 1981 tw.WriteLine(); 1982 tw.WriteLine("{0}", pp); 1983 } 1984 } 1985 1986 int codeLen = 0; 1987 foreach (CodeElement ce in gcode) { 1988 codeLen += ce.GetLength(oneByteCode); 1989 } 1990 int dataBlockLen = 0; 1991 foreach (ConstData cd in blocks.Values) { 1992 dataBlockLen += cd.Length; 1993 } 1994 1995 /* 1996 * Write some statistics on produced code. 1997 */ 1998 Console.WriteLine("code length: {0,6} byte(s)", codeLen); 1999 Console.WriteLine("data length: {0,6} byte(s)", dataLen); 2000 Console.WriteLine("total words: {0} (interpreted: {1})", 2001 slotInterpreted + numInterpreted, numInterpreted); 2002 } 2003 2004 internal Word Lookup(string name) 2005 { 2006 Word w = LookupNF(name); 2007 if (w != null) { 2008 return w; 2009 } 2010 throw new Exception(String.Format("No such word: '{0}'", name)); 2011 } 2012 2013 internal Word LookupNF(string name) 2014 { 2015 Word w; 2016 words.TryGetValue(name, out w); 2017 return w; 2018 } 2019 2020 internal TValue StringToBlob(string s) 2021 { 2022 return new TValue(0, new TPointerBlob(this, s)); 2023 } 2024 2025 internal bool TryParseLiteral(string tt, out TValue tv) 2026 { 2027 tv = new TValue(0); 2028 if (tt.StartsWith("\"")) { 2029 tv = StringToBlob(tt.Substring(1)); 2030 return true; 2031 } 2032 if (tt.StartsWith("`")) { 2033 tv = DecodeCharConst(tt.Substring(1)); 2034 return true; 2035 } 2036 bool neg = false; 2037 if (tt.StartsWith("-")) { 2038 neg = true; 2039 tt = tt.Substring(1); 2040 } else if (tt.StartsWith("+")) { 2041 tt = tt.Substring(1); 2042 } 2043 uint radix = 10; 2044 if (tt.StartsWith("0x") || tt.StartsWith("0X")) { 2045 radix = 16; 2046 tt = tt.Substring(2); 2047 } else if (tt.StartsWith("0b") || tt.StartsWith("0B")) { 2048 radix = 2; 2049 tt = tt.Substring(2); 2050 } 2051 if (tt.Length == 0) { 2052 return false; 2053 } 2054 uint acc = 0; 2055 bool overflow = false; 2056 uint maxV = uint.MaxValue / radix; 2057 foreach (char c in tt) { 2058 int d = HexVal(c); 2059 if (d < 0 || d >= radix) { 2060 return false; 2061 } 2062 if (acc > maxV) { 2063 overflow = true; 2064 } 2065 acc *= radix; 2066 if ((uint)d > uint.MaxValue - acc) { 2067 overflow = true; 2068 } 2069 acc += (uint)d; 2070 } 2071 int x = (int)acc; 2072 if (neg) { 2073 if (acc > (uint)0x80000000) { 2074 overflow = true; 2075 } 2076 x = -x; 2077 } 2078 if (overflow) { 2079 throw new Exception( 2080 "invalid literal integer (overflow)"); 2081 } 2082 tv = x; 2083 return true; 2084 } 2085 2086 int ParseInteger(string tt) 2087 { 2088 TValue tv; 2089 if (!TryParseLiteral(tt, out tv)) { 2090 throw new Exception("not an integer: " + ToString()); 2091 } 2092 return (int)tv; 2093 } 2094 2095 void CheckCompiling() 2096 { 2097 if (!compiling) { 2098 throw new Exception("Not in compilation mode"); 2099 } 2100 } 2101 2102 static string EscapeCComment(string s) 2103 { 2104 StringBuilder sb = new StringBuilder(); 2105 foreach (char c in s) { 2106 if (c >= 33 && c <= 126 && c != '%') { 2107 sb.Append(c); 2108 } else if (c < 0x100) { 2109 sb.AppendFormat("%{0:X2}", (int)c); 2110 } else if (c < 0x800) { 2111 sb.AppendFormat("%{0:X2}%{0:X2}", 2112 ((int)c >> 6) | 0xC0, 2113 ((int)c & 0x3F) | 0x80); 2114 } else { 2115 sb.AppendFormat("%{0:X2}%{0:X2}%{0:X2}", 2116 ((int)c >> 12) | 0xE0, 2117 (((int)c >> 6) & 0x3F) | 0x80, 2118 ((int)c & 0x3F) | 0x80); 2119 } 2120 } 2121 return sb.ToString().Replace("*/", "%2A/"); 2122 } 2123 } 2124