10b57cec5SDimitry Andric //===- YAMLParser.cpp - Simple YAML parser --------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements a YAML parser. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Support/YAMLParser.h" 140b57cec5SDimitry Andric #include "llvm/ADT/AllocatorList.h" 150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 200b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 210b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 220b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 230b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 240b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 250b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h" 260b57cec5SDimitry Andric #include "llvm/Support/SourceMgr.h" 270b57cec5SDimitry Andric #include "llvm/Support/Unicode.h" 280b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 290b57cec5SDimitry Andric #include <cassert> 300b57cec5SDimitry Andric #include <cstddef> 310b57cec5SDimitry Andric #include <cstdint> 320b57cec5SDimitry Andric #include <map> 330b57cec5SDimitry Andric #include <memory> 340b57cec5SDimitry Andric #include <string> 350b57cec5SDimitry Andric #include <system_error> 360b57cec5SDimitry Andric #include <utility> 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric using namespace llvm; 390b57cec5SDimitry Andric using namespace yaml; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric enum UnicodeEncodingForm { 420b57cec5SDimitry Andric UEF_UTF32_LE, ///< UTF-32 Little Endian 430b57cec5SDimitry Andric UEF_UTF32_BE, ///< UTF-32 Big Endian 440b57cec5SDimitry Andric UEF_UTF16_LE, ///< UTF-16 Little Endian 450b57cec5SDimitry Andric UEF_UTF16_BE, ///< UTF-16 Big Endian 460b57cec5SDimitry Andric UEF_UTF8, ///< UTF-8 or ascii. 470b57cec5SDimitry Andric UEF_Unknown ///< Not a valid Unicode encoding. 480b57cec5SDimitry Andric }; 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric /// EncodingInfo - Holds the encoding type and length of the byte order mark if 510b57cec5SDimitry Andric /// it exists. Length is in {0, 2, 3, 4}. 520b57cec5SDimitry Andric using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode 550b57cec5SDimitry Andric /// encoding form of \a Input. 560b57cec5SDimitry Andric /// 570b57cec5SDimitry Andric /// @param Input A string of length 0 or more. 580b57cec5SDimitry Andric /// @returns An EncodingInfo indicating the Unicode encoding form of the input 590b57cec5SDimitry Andric /// and how long the byte order mark is if one exists. 600b57cec5SDimitry Andric static EncodingInfo getUnicodeEncoding(StringRef Input) { 610b57cec5SDimitry Andric if (Input.empty()) 620b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric switch (uint8_t(Input[0])) { 650b57cec5SDimitry Andric case 0x00: 660b57cec5SDimitry Andric if (Input.size() >= 4) { 670b57cec5SDimitry Andric if ( Input[1] == 0 680b57cec5SDimitry Andric && uint8_t(Input[2]) == 0xFE 690b57cec5SDimitry Andric && uint8_t(Input[3]) == 0xFF) 700b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_BE, 4); 710b57cec5SDimitry Andric if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) 720b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_BE, 0); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric if (Input.size() >= 2 && Input[1] != 0) 760b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_BE, 0); 770b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0); 780b57cec5SDimitry Andric case 0xFF: 790b57cec5SDimitry Andric if ( Input.size() >= 4 800b57cec5SDimitry Andric && uint8_t(Input[1]) == 0xFE 810b57cec5SDimitry Andric && Input[2] == 0 820b57cec5SDimitry Andric && Input[3] == 0) 830b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_LE, 4); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) 860b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_LE, 2); 870b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0); 880b57cec5SDimitry Andric case 0xFE: 890b57cec5SDimitry Andric if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) 900b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_BE, 2); 910b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0); 920b57cec5SDimitry Andric case 0xEF: 930b57cec5SDimitry Andric if ( Input.size() >= 3 940b57cec5SDimitry Andric && uint8_t(Input[1]) == 0xBB 950b57cec5SDimitry Andric && uint8_t(Input[2]) == 0xBF) 960b57cec5SDimitry Andric return std::make_pair(UEF_UTF8, 3); 970b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric // It could still be utf-32 or utf-16. 1010b57cec5SDimitry Andric if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) 1020b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_LE, 0); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric if (Input.size() >= 2 && Input[1] == 0) 1050b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_LE, 0); 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric return std::make_pair(UEF_UTF8, 0); 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric /// Pin the vtables to this file. 1110b57cec5SDimitry Andric void Node::anchor() {} 1120b57cec5SDimitry Andric void NullNode::anchor() {} 1130b57cec5SDimitry Andric void ScalarNode::anchor() {} 1140b57cec5SDimitry Andric void BlockScalarNode::anchor() {} 1150b57cec5SDimitry Andric void KeyValueNode::anchor() {} 1160b57cec5SDimitry Andric void MappingNode::anchor() {} 1170b57cec5SDimitry Andric void SequenceNode::anchor() {} 1180b57cec5SDimitry Andric void AliasNode::anchor() {} 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric namespace llvm { 1210b57cec5SDimitry Andric namespace yaml { 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric /// Token - A single YAML token. 1240b57cec5SDimitry Andric struct Token { 1250b57cec5SDimitry Andric enum TokenKind { 1260b57cec5SDimitry Andric TK_Error, // Uninitialized token. 1270b57cec5SDimitry Andric TK_StreamStart, 1280b57cec5SDimitry Andric TK_StreamEnd, 1290b57cec5SDimitry Andric TK_VersionDirective, 1300b57cec5SDimitry Andric TK_TagDirective, 1310b57cec5SDimitry Andric TK_DocumentStart, 1320b57cec5SDimitry Andric TK_DocumentEnd, 1330b57cec5SDimitry Andric TK_BlockEntry, 1340b57cec5SDimitry Andric TK_BlockEnd, 1350b57cec5SDimitry Andric TK_BlockSequenceStart, 1360b57cec5SDimitry Andric TK_BlockMappingStart, 1370b57cec5SDimitry Andric TK_FlowEntry, 1380b57cec5SDimitry Andric TK_FlowSequenceStart, 1390b57cec5SDimitry Andric TK_FlowSequenceEnd, 1400b57cec5SDimitry Andric TK_FlowMappingStart, 1410b57cec5SDimitry Andric TK_FlowMappingEnd, 1420b57cec5SDimitry Andric TK_Key, 1430b57cec5SDimitry Andric TK_Value, 1440b57cec5SDimitry Andric TK_Scalar, 1450b57cec5SDimitry Andric TK_BlockScalar, 1460b57cec5SDimitry Andric TK_Alias, 1470b57cec5SDimitry Andric TK_Anchor, 1480b57cec5SDimitry Andric TK_Tag 1490b57cec5SDimitry Andric } Kind = TK_Error; 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric /// A string of length 0 or more whose begin() points to the logical location 1520b57cec5SDimitry Andric /// of the token in the input. 1530b57cec5SDimitry Andric StringRef Range; 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric /// The value of a block scalar node. 1560b57cec5SDimitry Andric std::string Value; 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric Token() = default; 1590b57cec5SDimitry Andric }; 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric } // end namespace yaml 1620b57cec5SDimitry Andric } // end namespace llvm 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric using TokenQueueT = BumpPtrList<Token>; 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric namespace { 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric /// This struct is used to track simple keys. 1690b57cec5SDimitry Andric /// 1700b57cec5SDimitry Andric /// Simple keys are handled by creating an entry in SimpleKeys for each Token 1710b57cec5SDimitry Andric /// which could legally be the start of a simple key. When peekNext is called, 1720b57cec5SDimitry Andric /// if the Token To be returned is referenced by a SimpleKey, we continue 1730b57cec5SDimitry Andric /// tokenizing until that potential simple key has either been found to not be 1740b57cec5SDimitry Andric /// a simple key (we moved on to the next line or went further than 1024 chars). 1750b57cec5SDimitry Andric /// Or when we run into a Value, and then insert a Key token (and possibly 1760b57cec5SDimitry Andric /// others) before the SimpleKey's Tok. 1770b57cec5SDimitry Andric struct SimpleKey { 1780b57cec5SDimitry Andric TokenQueueT::iterator Tok; 179480093f4SDimitry Andric unsigned Column = 0; 180480093f4SDimitry Andric unsigned Line = 0; 181480093f4SDimitry Andric unsigned FlowLevel = 0; 182480093f4SDimitry Andric bool IsRequired = false; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric bool operator ==(const SimpleKey &Other) { 1850b57cec5SDimitry Andric return Tok == Other.Tok; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric }; 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric } // end anonymous namespace 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric /// The Unicode scalar value of a UTF-8 minimal well-formed code unit 1920b57cec5SDimitry Andric /// subsequence and the subsequence's length in code units (uint8_t). 1930b57cec5SDimitry Andric /// A length of 0 represents an error. 1940b57cec5SDimitry Andric using UTF8Decoded = std::pair<uint32_t, unsigned>; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric static UTF8Decoded decodeUTF8(StringRef Range) { 1970b57cec5SDimitry Andric StringRef::iterator Position= Range.begin(); 1980b57cec5SDimitry Andric StringRef::iterator End = Range.end(); 1990b57cec5SDimitry Andric // 1 byte: [0x00, 0x7f] 2000b57cec5SDimitry Andric // Bit pattern: 0xxxxxxx 201e8d8bef9SDimitry Andric if (Position < End && (*Position & 0x80) == 0) { 2020b57cec5SDimitry Andric return std::make_pair(*Position, 1); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric // 2 bytes: [0x80, 0x7ff] 2050b57cec5SDimitry Andric // Bit pattern: 110xxxxx 10xxxxxx 206e8d8bef9SDimitry Andric if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) && 2070b57cec5SDimitry Andric ((*(Position + 1) & 0xC0) == 0x80)) { 2080b57cec5SDimitry Andric uint32_t codepoint = ((*Position & 0x1F) << 6) | 2090b57cec5SDimitry Andric (*(Position + 1) & 0x3F); 2100b57cec5SDimitry Andric if (codepoint >= 0x80) 2110b57cec5SDimitry Andric return std::make_pair(codepoint, 2); 2120b57cec5SDimitry Andric } 2130b57cec5SDimitry Andric // 3 bytes: [0x8000, 0xffff] 2140b57cec5SDimitry Andric // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx 215e8d8bef9SDimitry Andric if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) && 2160b57cec5SDimitry Andric ((*(Position + 1) & 0xC0) == 0x80) && 2170b57cec5SDimitry Andric ((*(Position + 2) & 0xC0) == 0x80)) { 2180b57cec5SDimitry Andric uint32_t codepoint = ((*Position & 0x0F) << 12) | 2190b57cec5SDimitry Andric ((*(Position + 1) & 0x3F) << 6) | 2200b57cec5SDimitry Andric (*(Position + 2) & 0x3F); 2210b57cec5SDimitry Andric // Codepoints between 0xD800 and 0xDFFF are invalid, as 2220b57cec5SDimitry Andric // they are high / low surrogate halves used by UTF-16. 2230b57cec5SDimitry Andric if (codepoint >= 0x800 && 2240b57cec5SDimitry Andric (codepoint < 0xD800 || codepoint > 0xDFFF)) 2250b57cec5SDimitry Andric return std::make_pair(codepoint, 3); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric // 4 bytes: [0x10000, 0x10FFFF] 2280b57cec5SDimitry Andric // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 229e8d8bef9SDimitry Andric if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) && 2300b57cec5SDimitry Andric ((*(Position + 1) & 0xC0) == 0x80) && 2310b57cec5SDimitry Andric ((*(Position + 2) & 0xC0) == 0x80) && 2320b57cec5SDimitry Andric ((*(Position + 3) & 0xC0) == 0x80)) { 2330b57cec5SDimitry Andric uint32_t codepoint = ((*Position & 0x07) << 18) | 2340b57cec5SDimitry Andric ((*(Position + 1) & 0x3F) << 12) | 2350b57cec5SDimitry Andric ((*(Position + 2) & 0x3F) << 6) | 2360b57cec5SDimitry Andric (*(Position + 3) & 0x3F); 2370b57cec5SDimitry Andric if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) 2380b57cec5SDimitry Andric return std::make_pair(codepoint, 4); 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric return std::make_pair(0, 0); 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric namespace llvm { 2440b57cec5SDimitry Andric namespace yaml { 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric /// Scans YAML tokens from a MemoryBuffer. 2470b57cec5SDimitry Andric class Scanner { 2480b57cec5SDimitry Andric public: 2490b57cec5SDimitry Andric Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, 2500b57cec5SDimitry Andric std::error_code *EC = nullptr); 2510b57cec5SDimitry Andric Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, 2520b57cec5SDimitry Andric std::error_code *EC = nullptr); 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric /// Parse the next token and return it without popping it. 2550b57cec5SDimitry Andric Token &peekNext(); 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric /// Parse the next token and pop it from the queue. 2580b57cec5SDimitry Andric Token getNext(); 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, 261bdd1243dSDimitry Andric ArrayRef<SMRange> Ranges = std::nullopt) { 262bdd1243dSDimitry Andric SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ std::nullopt, 263bdd1243dSDimitry Andric ShowColors); 2640b57cec5SDimitry Andric } 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric void setError(const Twine &Message, StringRef::iterator Position) { 2675ffd83dbSDimitry Andric if (Position >= End) 2685ffd83dbSDimitry Andric Position = End - 1; 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric // propagate the error if possible 2710b57cec5SDimitry Andric if (EC) 2720b57cec5SDimitry Andric *EC = make_error_code(std::errc::invalid_argument); 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric // Don't print out more errors after the first one we encounter. The rest 2750b57cec5SDimitry Andric // are just the result of the first, and have no meaning. 2760b57cec5SDimitry Andric if (!Failed) 2775ffd83dbSDimitry Andric printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message); 2780b57cec5SDimitry Andric Failed = true; 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric /// Returns true if an error occurred while parsing. 2820b57cec5SDimitry Andric bool failed() { 2830b57cec5SDimitry Andric return Failed; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric private: 2870b57cec5SDimitry Andric void init(MemoryBufferRef Buffer); 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric StringRef currentInput() { 2900b57cec5SDimitry Andric return StringRef(Current, End - Current); 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric /// Decode a UTF-8 minimal well-formed code unit subsequence starting 2940b57cec5SDimitry Andric /// at \a Position. 2950b57cec5SDimitry Andric /// 2960b57cec5SDimitry Andric /// If the UTF-8 code units starting at Position do not form a well-formed 2970b57cec5SDimitry Andric /// code unit subsequence, then the Unicode scalar value is 0, and the length 2980b57cec5SDimitry Andric /// is 0. 2990b57cec5SDimitry Andric UTF8Decoded decodeUTF8(StringRef::iterator Position) { 3000b57cec5SDimitry Andric return ::decodeUTF8(StringRef(Position, End - Position)); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric // The following functions are based on the gramar rules in the YAML spec. The 3040b57cec5SDimitry Andric // style of the function names it meant to closely match how they are written 3050b57cec5SDimitry Andric // in the spec. The number within the [] is the number of the grammar rule in 3060b57cec5SDimitry Andric // the spec. 3070b57cec5SDimitry Andric // 3080b57cec5SDimitry Andric // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. 3090b57cec5SDimitry Andric // 3100b57cec5SDimitry Andric // c- 3110b57cec5SDimitry Andric // A production starting and ending with a special character. 3120b57cec5SDimitry Andric // b- 3130b57cec5SDimitry Andric // A production matching a single line break. 3140b57cec5SDimitry Andric // nb- 3150b57cec5SDimitry Andric // A production starting and ending with a non-break character. 3160b57cec5SDimitry Andric // s- 3170b57cec5SDimitry Andric // A production starting and ending with a white space character. 3180b57cec5SDimitry Andric // ns- 3190b57cec5SDimitry Andric // A production starting and ending with a non-space character. 3200b57cec5SDimitry Andric // l- 3210b57cec5SDimitry Andric // A production matching complete line(s). 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric /// Skip a single nb-char[27] starting at Position. 3240b57cec5SDimitry Andric /// 3250b57cec5SDimitry Andric /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] 3260b57cec5SDimitry Andric /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] 3270b57cec5SDimitry Andric /// 3280b57cec5SDimitry Andric /// @returns The code unit after the nb-char, or Position if it's not an 3290b57cec5SDimitry Andric /// nb-char. 3300b57cec5SDimitry Andric StringRef::iterator skip_nb_char(StringRef::iterator Position); 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric /// Skip a single b-break[28] starting at Position. 3330b57cec5SDimitry Andric /// 3340b57cec5SDimitry Andric /// A b-break is 0xD 0xA | 0xD | 0xA 3350b57cec5SDimitry Andric /// 3360b57cec5SDimitry Andric /// @returns The code unit after the b-break, or Position if it's not a 3370b57cec5SDimitry Andric /// b-break. 3380b57cec5SDimitry Andric StringRef::iterator skip_b_break(StringRef::iterator Position); 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric /// Skip a single s-space[31] starting at Position. 3410b57cec5SDimitry Andric /// 3420b57cec5SDimitry Andric /// An s-space is 0x20 3430b57cec5SDimitry Andric /// 3440b57cec5SDimitry Andric /// @returns The code unit after the s-space, or Position if it's not a 3450b57cec5SDimitry Andric /// s-space. 3460b57cec5SDimitry Andric StringRef::iterator skip_s_space(StringRef::iterator Position); 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric /// Skip a single s-white[33] starting at Position. 3490b57cec5SDimitry Andric /// 3500b57cec5SDimitry Andric /// A s-white is 0x20 | 0x9 3510b57cec5SDimitry Andric /// 3520b57cec5SDimitry Andric /// @returns The code unit after the s-white, or Position if it's not a 3530b57cec5SDimitry Andric /// s-white. 3540b57cec5SDimitry Andric StringRef::iterator skip_s_white(StringRef::iterator Position); 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric /// Skip a single ns-char[34] starting at Position. 3570b57cec5SDimitry Andric /// 3580b57cec5SDimitry Andric /// A ns-char is nb-char - s-white 3590b57cec5SDimitry Andric /// 3600b57cec5SDimitry Andric /// @returns The code unit after the ns-char, or Position if it's not a 3610b57cec5SDimitry Andric /// ns-char. 3620b57cec5SDimitry Andric StringRef::iterator skip_ns_char(StringRef::iterator Position); 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric /// Skip minimal well-formed code unit subsequences until Func 3670b57cec5SDimitry Andric /// returns its input. 3680b57cec5SDimitry Andric /// 3690b57cec5SDimitry Andric /// @returns The code unit after the last minimal well-formed code unit 3700b57cec5SDimitry Andric /// subsequence that Func accepted. 3710b57cec5SDimitry Andric StringRef::iterator skip_while( SkipWhileFunc Func 3720b57cec5SDimitry Andric , StringRef::iterator Position); 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric /// Skip minimal well-formed code unit subsequences until Func returns its 3750b57cec5SDimitry Andric /// input. 3760b57cec5SDimitry Andric void advanceWhile(SkipWhileFunc Func); 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric /// Scan ns-uri-char[39]s starting at Cur. 3790b57cec5SDimitry Andric /// 3800b57cec5SDimitry Andric /// This updates Cur and Column while scanning. 3810b57cec5SDimitry Andric void scan_ns_uri_char(); 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric /// Consume a minimal well-formed code unit subsequence starting at 3840b57cec5SDimitry Andric /// \a Cur. Return false if it is not the same Unicode scalar value as 3850b57cec5SDimitry Andric /// \a Expected. This updates \a Column. 3860b57cec5SDimitry Andric bool consume(uint32_t Expected); 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. 3890b57cec5SDimitry Andric void skip(uint32_t Distance); 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric /// Return true if the minimal well-formed code unit subsequence at 3920b57cec5SDimitry Andric /// Pos is whitespace or a new line 3930b57cec5SDimitry Andric bool isBlankOrBreak(StringRef::iterator Position); 3940b57cec5SDimitry Andric 395*5f757f3fSDimitry Andric /// Return true if the minimal well-formed code unit subsequence at 396*5f757f3fSDimitry Andric /// Pos is considered a "safe" character for plain scalars. 397*5f757f3fSDimitry Andric bool isPlainSafeNonBlank(StringRef::iterator Position); 398*5f757f3fSDimitry Andric 39981ad6265SDimitry Andric /// Return true if the line is a line break, false otherwise. 40081ad6265SDimitry Andric bool isLineEmpty(StringRef Line); 40181ad6265SDimitry Andric 4020b57cec5SDimitry Andric /// Consume a single b-break[28] if it's present at the current position. 4030b57cec5SDimitry Andric /// 4040b57cec5SDimitry Andric /// Return false if the code unit at the current position isn't a line break. 4050b57cec5SDimitry Andric bool consumeLineBreakIfPresent(); 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. 4080b57cec5SDimitry Andric void saveSimpleKeyCandidate( TokenQueueT::iterator Tok 4090b57cec5SDimitry Andric , unsigned AtColumn 4100b57cec5SDimitry Andric , bool IsRequired); 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric /// Remove simple keys that can no longer be valid simple keys. 4130b57cec5SDimitry Andric /// 4140b57cec5SDimitry Andric /// Invalid simple keys are not on the current line or are further than 1024 4150b57cec5SDimitry Andric /// columns back. 4160b57cec5SDimitry Andric void removeStaleSimpleKeyCandidates(); 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric /// Remove all simple keys on FlowLevel \a Level. 4190b57cec5SDimitry Andric void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd 4220b57cec5SDimitry Andric /// tokens if needed. 4230b57cec5SDimitry Andric bool unrollIndent(int ToColumn); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint 4260b57cec5SDimitry Andric /// if needed. 4270b57cec5SDimitry Andric bool rollIndent( int ToColumn 4280b57cec5SDimitry Andric , Token::TokenKind Kind 4290b57cec5SDimitry Andric , TokenQueueT::iterator InsertPoint); 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric /// Skip a single-line comment when the comment starts at the current 4320b57cec5SDimitry Andric /// position of the scanner. 4330b57cec5SDimitry Andric void skipComment(); 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric /// Skip whitespace and comments until the start of the next token. 4360b57cec5SDimitry Andric void scanToNextToken(); 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric /// Must be the first token generated. 4390b57cec5SDimitry Andric bool scanStreamStart(); 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric /// Generate tokens needed to close out the stream. 4420b57cec5SDimitry Andric bool scanStreamEnd(); 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric /// Scan a %BLAH directive. 4450b57cec5SDimitry Andric bool scanDirective(); 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric /// Scan a ... or ---. 4480b57cec5SDimitry Andric bool scanDocumentIndicator(bool IsStart); 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric /// Scan a [ or { and generate the proper flow collection start token. 4510b57cec5SDimitry Andric bool scanFlowCollectionStart(bool IsSequence); 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric /// Scan a ] or } and generate the proper flow collection end token. 4540b57cec5SDimitry Andric bool scanFlowCollectionEnd(bool IsSequence); 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric /// Scan the , that separates entries in a flow collection. 4570b57cec5SDimitry Andric bool scanFlowEntry(); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric /// Scan the - that starts block sequence entries. 4600b57cec5SDimitry Andric bool scanBlockEntry(); 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric /// Scan an explicit ? indicating a key. 4630b57cec5SDimitry Andric bool scanKey(); 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric /// Scan an explicit : indicating a value. 4660b57cec5SDimitry Andric bool scanValue(); 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric /// Scan a quoted scalar. 4690b57cec5SDimitry Andric bool scanFlowScalar(bool IsDoubleQuoted); 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric /// Scan an unquoted scalar. 4720b57cec5SDimitry Andric bool scanPlainScalar(); 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric /// Scan an Alias or Anchor starting with * or &. 4750b57cec5SDimitry Andric bool scanAliasOrAnchor(bool IsAlias); 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric /// Scan a block scalar starting with | or >. 4780b57cec5SDimitry Andric bool scanBlockScalar(bool IsLiteral); 4790b57cec5SDimitry Andric 48081ad6265SDimitry Andric /// Scan a block scalar style indicator and header. 48181ad6265SDimitry Andric /// 48281ad6265SDimitry Andric /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that 48381ad6265SDimitry Andric /// YAML does not consider the style indicator to be a part of the header. 48481ad6265SDimitry Andric /// 48581ad6265SDimitry Andric /// Return false if an error occurred. 48681ad6265SDimitry Andric bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator, 48781ad6265SDimitry Andric unsigned &IndentIndicator, bool &IsDone); 48881ad6265SDimitry Andric 48981ad6265SDimitry Andric /// Scan a style indicator in a block scalar header. 49081ad6265SDimitry Andric char scanBlockStyleIndicator(); 49181ad6265SDimitry Andric 4920b57cec5SDimitry Andric /// Scan a chomping indicator in a block scalar header. 4930b57cec5SDimitry Andric char scanBlockChompingIndicator(); 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric /// Scan an indentation indicator in a block scalar header. 4960b57cec5SDimitry Andric unsigned scanBlockIndentationIndicator(); 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric /// Scan a block scalar header. 4990b57cec5SDimitry Andric /// 5000b57cec5SDimitry Andric /// Return false if an error occurred. 5010b57cec5SDimitry Andric bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, 5020b57cec5SDimitry Andric bool &IsDone); 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric /// Look for the indentation level of a block scalar. 5050b57cec5SDimitry Andric /// 5060b57cec5SDimitry Andric /// Return false if an error occurred. 5070b57cec5SDimitry Andric bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, 5080b57cec5SDimitry Andric unsigned &LineBreaks, bool &IsDone); 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric /// Scan the indentation of a text line in a block scalar. 5110b57cec5SDimitry Andric /// 5120b57cec5SDimitry Andric /// Return false if an error occurred. 5130b57cec5SDimitry Andric bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, 5140b57cec5SDimitry Andric bool &IsDone); 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric /// Scan a tag of the form !stuff. 5170b57cec5SDimitry Andric bool scanTag(); 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric /// Dispatch to the next scanning function based on \a *Cur. 5200b57cec5SDimitry Andric bool fetchMoreTokens(); 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric /// The SourceMgr used for diagnostics and buffer management. 5230b57cec5SDimitry Andric SourceMgr &SM; 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric /// The original input. 5260b57cec5SDimitry Andric MemoryBufferRef InputBuffer; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric /// The current position of the scanner. 5290b57cec5SDimitry Andric StringRef::iterator Current; 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric /// The end of the input (one past the last character). 5320b57cec5SDimitry Andric StringRef::iterator End; 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric /// Current YAML indentation level in spaces. 5350b57cec5SDimitry Andric int Indent; 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric /// Current column number in Unicode code points. 5380b57cec5SDimitry Andric unsigned Column; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric /// Current line number. 5410b57cec5SDimitry Andric unsigned Line; 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric /// How deep we are in flow style containers. 0 Means at block level. 5440b57cec5SDimitry Andric unsigned FlowLevel; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric /// Are we at the start of the stream? 5470b57cec5SDimitry Andric bool IsStartOfStream; 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric /// Can the next token be the start of a simple key? 5500b57cec5SDimitry Andric bool IsSimpleKeyAllowed; 5510b57cec5SDimitry Andric 552*5f757f3fSDimitry Andric /// Can the next token be a value indicator even if it does not have a 553*5f757f3fSDimitry Andric /// trailing space? 554*5f757f3fSDimitry Andric bool IsAdjacentValueAllowedInFlow; 555*5f757f3fSDimitry Andric 5560b57cec5SDimitry Andric /// True if an error has occurred. 5570b57cec5SDimitry Andric bool Failed; 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric /// Should colors be used when printing out the diagnostic messages? 5600b57cec5SDimitry Andric bool ShowColors; 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric /// Queue of tokens. This is required to queue up tokens while looking 5630b57cec5SDimitry Andric /// for the end of a simple key. And for cases where a single character 5640b57cec5SDimitry Andric /// can produce multiple tokens (e.g. BlockEnd). 5650b57cec5SDimitry Andric TokenQueueT TokenQueue; 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric /// Indentation levels. 5680b57cec5SDimitry Andric SmallVector<int, 4> Indents; 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric /// Potential simple keys. 5710b57cec5SDimitry Andric SmallVector<SimpleKey, 4> SimpleKeys; 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric std::error_code *EC; 5740b57cec5SDimitry Andric }; 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric } // end namespace yaml 5770b57cec5SDimitry Andric } // end namespace llvm 5780b57cec5SDimitry Andric 5790b57cec5SDimitry Andric /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. 5800b57cec5SDimitry Andric static void encodeUTF8( uint32_t UnicodeScalarValue 5810b57cec5SDimitry Andric , SmallVectorImpl<char> &Result) { 5820b57cec5SDimitry Andric if (UnicodeScalarValue <= 0x7F) { 5830b57cec5SDimitry Andric Result.push_back(UnicodeScalarValue & 0x7F); 5840b57cec5SDimitry Andric } else if (UnicodeScalarValue <= 0x7FF) { 5850b57cec5SDimitry Andric uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); 5860b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); 5870b57cec5SDimitry Andric Result.push_back(FirstByte); 5880b57cec5SDimitry Andric Result.push_back(SecondByte); 5890b57cec5SDimitry Andric } else if (UnicodeScalarValue <= 0xFFFF) { 5900b57cec5SDimitry Andric uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); 5910b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 5920b57cec5SDimitry Andric uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); 5930b57cec5SDimitry Andric Result.push_back(FirstByte); 5940b57cec5SDimitry Andric Result.push_back(SecondByte); 5950b57cec5SDimitry Andric Result.push_back(ThirdByte); 5960b57cec5SDimitry Andric } else if (UnicodeScalarValue <= 0x10FFFF) { 5970b57cec5SDimitry Andric uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); 5980b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); 5990b57cec5SDimitry Andric uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 6000b57cec5SDimitry Andric uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); 6010b57cec5SDimitry Andric Result.push_back(FirstByte); 6020b57cec5SDimitry Andric Result.push_back(SecondByte); 6030b57cec5SDimitry Andric Result.push_back(ThirdByte); 6040b57cec5SDimitry Andric Result.push_back(FourthByte); 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { 6090b57cec5SDimitry Andric SourceMgr SM; 6100b57cec5SDimitry Andric Scanner scanner(Input, SM); 6110b57cec5SDimitry Andric while (true) { 6120b57cec5SDimitry Andric Token T = scanner.getNext(); 6130b57cec5SDimitry Andric switch (T.Kind) { 6140b57cec5SDimitry Andric case Token::TK_StreamStart: 6150b57cec5SDimitry Andric OS << "Stream-Start: "; 6160b57cec5SDimitry Andric break; 6170b57cec5SDimitry Andric case Token::TK_StreamEnd: 6180b57cec5SDimitry Andric OS << "Stream-End: "; 6190b57cec5SDimitry Andric break; 6200b57cec5SDimitry Andric case Token::TK_VersionDirective: 6210b57cec5SDimitry Andric OS << "Version-Directive: "; 6220b57cec5SDimitry Andric break; 6230b57cec5SDimitry Andric case Token::TK_TagDirective: 6240b57cec5SDimitry Andric OS << "Tag-Directive: "; 6250b57cec5SDimitry Andric break; 6260b57cec5SDimitry Andric case Token::TK_DocumentStart: 6270b57cec5SDimitry Andric OS << "Document-Start: "; 6280b57cec5SDimitry Andric break; 6290b57cec5SDimitry Andric case Token::TK_DocumentEnd: 6300b57cec5SDimitry Andric OS << "Document-End: "; 6310b57cec5SDimitry Andric break; 6320b57cec5SDimitry Andric case Token::TK_BlockEntry: 6330b57cec5SDimitry Andric OS << "Block-Entry: "; 6340b57cec5SDimitry Andric break; 6350b57cec5SDimitry Andric case Token::TK_BlockEnd: 6360b57cec5SDimitry Andric OS << "Block-End: "; 6370b57cec5SDimitry Andric break; 6380b57cec5SDimitry Andric case Token::TK_BlockSequenceStart: 6390b57cec5SDimitry Andric OS << "Block-Sequence-Start: "; 6400b57cec5SDimitry Andric break; 6410b57cec5SDimitry Andric case Token::TK_BlockMappingStart: 6420b57cec5SDimitry Andric OS << "Block-Mapping-Start: "; 6430b57cec5SDimitry Andric break; 6440b57cec5SDimitry Andric case Token::TK_FlowEntry: 6450b57cec5SDimitry Andric OS << "Flow-Entry: "; 6460b57cec5SDimitry Andric break; 6470b57cec5SDimitry Andric case Token::TK_FlowSequenceStart: 6480b57cec5SDimitry Andric OS << "Flow-Sequence-Start: "; 6490b57cec5SDimitry Andric break; 6500b57cec5SDimitry Andric case Token::TK_FlowSequenceEnd: 6510b57cec5SDimitry Andric OS << "Flow-Sequence-End: "; 6520b57cec5SDimitry Andric break; 6530b57cec5SDimitry Andric case Token::TK_FlowMappingStart: 6540b57cec5SDimitry Andric OS << "Flow-Mapping-Start: "; 6550b57cec5SDimitry Andric break; 6560b57cec5SDimitry Andric case Token::TK_FlowMappingEnd: 6570b57cec5SDimitry Andric OS << "Flow-Mapping-End: "; 6580b57cec5SDimitry Andric break; 6590b57cec5SDimitry Andric case Token::TK_Key: 6600b57cec5SDimitry Andric OS << "Key: "; 6610b57cec5SDimitry Andric break; 6620b57cec5SDimitry Andric case Token::TK_Value: 6630b57cec5SDimitry Andric OS << "Value: "; 6640b57cec5SDimitry Andric break; 6650b57cec5SDimitry Andric case Token::TK_Scalar: 6660b57cec5SDimitry Andric OS << "Scalar: "; 6670b57cec5SDimitry Andric break; 6680b57cec5SDimitry Andric case Token::TK_BlockScalar: 6690b57cec5SDimitry Andric OS << "Block Scalar: "; 6700b57cec5SDimitry Andric break; 6710b57cec5SDimitry Andric case Token::TK_Alias: 6720b57cec5SDimitry Andric OS << "Alias: "; 6730b57cec5SDimitry Andric break; 6740b57cec5SDimitry Andric case Token::TK_Anchor: 6750b57cec5SDimitry Andric OS << "Anchor: "; 6760b57cec5SDimitry Andric break; 6770b57cec5SDimitry Andric case Token::TK_Tag: 6780b57cec5SDimitry Andric OS << "Tag: "; 6790b57cec5SDimitry Andric break; 6800b57cec5SDimitry Andric case Token::TK_Error: 6810b57cec5SDimitry Andric break; 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric OS << T.Range << "\n"; 6840b57cec5SDimitry Andric if (T.Kind == Token::TK_StreamEnd) 6850b57cec5SDimitry Andric break; 6860b57cec5SDimitry Andric else if (T.Kind == Token::TK_Error) 6870b57cec5SDimitry Andric return false; 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric return true; 6900b57cec5SDimitry Andric } 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric bool yaml::scanTokens(StringRef Input) { 6930b57cec5SDimitry Andric SourceMgr SM; 6940b57cec5SDimitry Andric Scanner scanner(Input, SM); 6950b57cec5SDimitry Andric while (true) { 6960b57cec5SDimitry Andric Token T = scanner.getNext(); 6970b57cec5SDimitry Andric if (T.Kind == Token::TK_StreamEnd) 6980b57cec5SDimitry Andric break; 6990b57cec5SDimitry Andric else if (T.Kind == Token::TK_Error) 7000b57cec5SDimitry Andric return false; 7010b57cec5SDimitry Andric } 7020b57cec5SDimitry Andric return true; 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric std::string yaml::escape(StringRef Input, bool EscapePrintable) { 7060b57cec5SDimitry Andric std::string EscapedInput; 7070b57cec5SDimitry Andric for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { 7080b57cec5SDimitry Andric if (*i == '\\') 7090b57cec5SDimitry Andric EscapedInput += "\\\\"; 7100b57cec5SDimitry Andric else if (*i == '"') 7110b57cec5SDimitry Andric EscapedInput += "\\\""; 7120b57cec5SDimitry Andric else if (*i == 0) 7130b57cec5SDimitry Andric EscapedInput += "\\0"; 7140b57cec5SDimitry Andric else if (*i == 0x07) 7150b57cec5SDimitry Andric EscapedInput += "\\a"; 7160b57cec5SDimitry Andric else if (*i == 0x08) 7170b57cec5SDimitry Andric EscapedInput += "\\b"; 7180b57cec5SDimitry Andric else if (*i == 0x09) 7190b57cec5SDimitry Andric EscapedInput += "\\t"; 7200b57cec5SDimitry Andric else if (*i == 0x0A) 7210b57cec5SDimitry Andric EscapedInput += "\\n"; 7220b57cec5SDimitry Andric else if (*i == 0x0B) 7230b57cec5SDimitry Andric EscapedInput += "\\v"; 7240b57cec5SDimitry Andric else if (*i == 0x0C) 7250b57cec5SDimitry Andric EscapedInput += "\\f"; 7260b57cec5SDimitry Andric else if (*i == 0x0D) 7270b57cec5SDimitry Andric EscapedInput += "\\r"; 7280b57cec5SDimitry Andric else if (*i == 0x1B) 7290b57cec5SDimitry Andric EscapedInput += "\\e"; 7300b57cec5SDimitry Andric else if ((unsigned char)*i < 0x20) { // Control characters not handled above. 7310b57cec5SDimitry Andric std::string HexStr = utohexstr(*i); 7320b57cec5SDimitry Andric EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 7330b57cec5SDimitry Andric } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. 7340b57cec5SDimitry Andric UTF8Decoded UnicodeScalarValue 7350b57cec5SDimitry Andric = decodeUTF8(StringRef(i, Input.end() - i)); 7360b57cec5SDimitry Andric if (UnicodeScalarValue.second == 0) { 7370b57cec5SDimitry Andric // Found invalid char. 7380b57cec5SDimitry Andric SmallString<4> Val; 7390b57cec5SDimitry Andric encodeUTF8(0xFFFD, Val); 740e8d8bef9SDimitry Andric llvm::append_range(EscapedInput, Val); 7410b57cec5SDimitry Andric // FIXME: Error reporting. 7420b57cec5SDimitry Andric return EscapedInput; 7430b57cec5SDimitry Andric } 7440b57cec5SDimitry Andric if (UnicodeScalarValue.first == 0x85) 7450b57cec5SDimitry Andric EscapedInput += "\\N"; 7460b57cec5SDimitry Andric else if (UnicodeScalarValue.first == 0xA0) 7470b57cec5SDimitry Andric EscapedInput += "\\_"; 7480b57cec5SDimitry Andric else if (UnicodeScalarValue.first == 0x2028) 7490b57cec5SDimitry Andric EscapedInput += "\\L"; 7500b57cec5SDimitry Andric else if (UnicodeScalarValue.first == 0x2029) 7510b57cec5SDimitry Andric EscapedInput += "\\P"; 7520b57cec5SDimitry Andric else if (!EscapePrintable && 7530b57cec5SDimitry Andric sys::unicode::isPrintable(UnicodeScalarValue.first)) 7540b57cec5SDimitry Andric EscapedInput += StringRef(i, UnicodeScalarValue.second); 7550b57cec5SDimitry Andric else { 7560b57cec5SDimitry Andric std::string HexStr = utohexstr(UnicodeScalarValue.first); 7570b57cec5SDimitry Andric if (HexStr.size() <= 2) 7580b57cec5SDimitry Andric EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 7590b57cec5SDimitry Andric else if (HexStr.size() <= 4) 7600b57cec5SDimitry Andric EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; 7610b57cec5SDimitry Andric else if (HexStr.size() <= 8) 7620b57cec5SDimitry Andric EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric i += UnicodeScalarValue.second - 1; 7650b57cec5SDimitry Andric } else 7660b57cec5SDimitry Andric EscapedInput.push_back(*i); 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric return EscapedInput; 7690b57cec5SDimitry Andric } 7700b57cec5SDimitry Andric 771bdd1243dSDimitry Andric std::optional<bool> yaml::parseBool(StringRef S) { 772e8d8bef9SDimitry Andric switch (S.size()) { 773e8d8bef9SDimitry Andric case 1: 774e8d8bef9SDimitry Andric switch (S.front()) { 775e8d8bef9SDimitry Andric case 'y': 776e8d8bef9SDimitry Andric case 'Y': 777e8d8bef9SDimitry Andric return true; 778e8d8bef9SDimitry Andric case 'n': 779e8d8bef9SDimitry Andric case 'N': 780e8d8bef9SDimitry Andric return false; 781e8d8bef9SDimitry Andric default: 782bdd1243dSDimitry Andric return std::nullopt; 783e8d8bef9SDimitry Andric } 784e8d8bef9SDimitry Andric case 2: 785e8d8bef9SDimitry Andric switch (S.front()) { 786e8d8bef9SDimitry Andric case 'O': 787e8d8bef9SDimitry Andric if (S[1] == 'N') // ON 788e8d8bef9SDimitry Andric return true; 789bdd1243dSDimitry Andric [[fallthrough]]; 790e8d8bef9SDimitry Andric case 'o': 791e8d8bef9SDimitry Andric if (S[1] == 'n') //[Oo]n 792e8d8bef9SDimitry Andric return true; 793bdd1243dSDimitry Andric return std::nullopt; 794e8d8bef9SDimitry Andric case 'N': 795e8d8bef9SDimitry Andric if (S[1] == 'O') // NO 796e8d8bef9SDimitry Andric return false; 797bdd1243dSDimitry Andric [[fallthrough]]; 798e8d8bef9SDimitry Andric case 'n': 799e8d8bef9SDimitry Andric if (S[1] == 'o') //[Nn]o 800e8d8bef9SDimitry Andric return false; 801bdd1243dSDimitry Andric return std::nullopt; 802e8d8bef9SDimitry Andric default: 803bdd1243dSDimitry Andric return std::nullopt; 804e8d8bef9SDimitry Andric } 805e8d8bef9SDimitry Andric case 3: 806e8d8bef9SDimitry Andric switch (S.front()) { 807e8d8bef9SDimitry Andric case 'O': 808e8d8bef9SDimitry Andric if (S.drop_front() == "FF") // OFF 809e8d8bef9SDimitry Andric return false; 810bdd1243dSDimitry Andric [[fallthrough]]; 811e8d8bef9SDimitry Andric case 'o': 812e8d8bef9SDimitry Andric if (S.drop_front() == "ff") //[Oo]ff 813e8d8bef9SDimitry Andric return false; 814bdd1243dSDimitry Andric return std::nullopt; 815e8d8bef9SDimitry Andric case 'Y': 816e8d8bef9SDimitry Andric if (S.drop_front() == "ES") // YES 817e8d8bef9SDimitry Andric return true; 818bdd1243dSDimitry Andric [[fallthrough]]; 819e8d8bef9SDimitry Andric case 'y': 820e8d8bef9SDimitry Andric if (S.drop_front() == "es") //[Yy]es 821e8d8bef9SDimitry Andric return true; 822bdd1243dSDimitry Andric return std::nullopt; 823e8d8bef9SDimitry Andric default: 824bdd1243dSDimitry Andric return std::nullopt; 825e8d8bef9SDimitry Andric } 826e8d8bef9SDimitry Andric case 4: 827e8d8bef9SDimitry Andric switch (S.front()) { 828e8d8bef9SDimitry Andric case 'T': 829e8d8bef9SDimitry Andric if (S.drop_front() == "RUE") // TRUE 830e8d8bef9SDimitry Andric return true; 831bdd1243dSDimitry Andric [[fallthrough]]; 832e8d8bef9SDimitry Andric case 't': 833e8d8bef9SDimitry Andric if (S.drop_front() == "rue") //[Tt]rue 834e8d8bef9SDimitry Andric return true; 835bdd1243dSDimitry Andric return std::nullopt; 836e8d8bef9SDimitry Andric default: 837bdd1243dSDimitry Andric return std::nullopt; 838e8d8bef9SDimitry Andric } 839e8d8bef9SDimitry Andric case 5: 840e8d8bef9SDimitry Andric switch (S.front()) { 841e8d8bef9SDimitry Andric case 'F': 842e8d8bef9SDimitry Andric if (S.drop_front() == "ALSE") // FALSE 843e8d8bef9SDimitry Andric return false; 844bdd1243dSDimitry Andric [[fallthrough]]; 845e8d8bef9SDimitry Andric case 'f': 846e8d8bef9SDimitry Andric if (S.drop_front() == "alse") //[Ff]alse 847e8d8bef9SDimitry Andric return false; 848bdd1243dSDimitry Andric return std::nullopt; 849e8d8bef9SDimitry Andric default: 850bdd1243dSDimitry Andric return std::nullopt; 851e8d8bef9SDimitry Andric } 852e8d8bef9SDimitry Andric default: 853bdd1243dSDimitry Andric return std::nullopt; 854e8d8bef9SDimitry Andric } 855e8d8bef9SDimitry Andric } 856e8d8bef9SDimitry Andric 8570b57cec5SDimitry Andric Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, 8580b57cec5SDimitry Andric std::error_code *EC) 8590b57cec5SDimitry Andric : SM(sm), ShowColors(ShowColors), EC(EC) { 8600b57cec5SDimitry Andric init(MemoryBufferRef(Input, "YAML")); 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, 8640b57cec5SDimitry Andric std::error_code *EC) 8650b57cec5SDimitry Andric : SM(SM_), ShowColors(ShowColors), EC(EC) { 8660b57cec5SDimitry Andric init(Buffer); 8670b57cec5SDimitry Andric } 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric void Scanner::init(MemoryBufferRef Buffer) { 8700b57cec5SDimitry Andric InputBuffer = Buffer; 8710b57cec5SDimitry Andric Current = InputBuffer.getBufferStart(); 8720b57cec5SDimitry Andric End = InputBuffer.getBufferEnd(); 8730b57cec5SDimitry Andric Indent = -1; 8740b57cec5SDimitry Andric Column = 0; 8750b57cec5SDimitry Andric Line = 0; 8760b57cec5SDimitry Andric FlowLevel = 0; 8770b57cec5SDimitry Andric IsStartOfStream = true; 8780b57cec5SDimitry Andric IsSimpleKeyAllowed = true; 879*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 8800b57cec5SDimitry Andric Failed = false; 8810b57cec5SDimitry Andric std::unique_ptr<MemoryBuffer> InputBufferOwner = 882e8d8bef9SDimitry Andric MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false); 8830b57cec5SDimitry Andric SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); 8840b57cec5SDimitry Andric } 8850b57cec5SDimitry Andric 8860b57cec5SDimitry Andric Token &Scanner::peekNext() { 8870b57cec5SDimitry Andric // If the current token is a possible simple key, keep parsing until we 8880b57cec5SDimitry Andric // can confirm. 8890b57cec5SDimitry Andric bool NeedMore = false; 8900b57cec5SDimitry Andric while (true) { 8910b57cec5SDimitry Andric if (TokenQueue.empty() || NeedMore) { 8920b57cec5SDimitry Andric if (!fetchMoreTokens()) { 8930b57cec5SDimitry Andric TokenQueue.clear(); 894480093f4SDimitry Andric SimpleKeys.clear(); 8950b57cec5SDimitry Andric TokenQueue.push_back(Token()); 8960b57cec5SDimitry Andric return TokenQueue.front(); 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric } 8990b57cec5SDimitry Andric assert(!TokenQueue.empty() && 9000b57cec5SDimitry Andric "fetchMoreTokens lied about getting tokens!"); 9010b57cec5SDimitry Andric 9020b57cec5SDimitry Andric removeStaleSimpleKeyCandidates(); 9030b57cec5SDimitry Andric SimpleKey SK; 9040b57cec5SDimitry Andric SK.Tok = TokenQueue.begin(); 9050b57cec5SDimitry Andric if (!is_contained(SimpleKeys, SK)) 9060b57cec5SDimitry Andric break; 9070b57cec5SDimitry Andric else 9080b57cec5SDimitry Andric NeedMore = true; 9090b57cec5SDimitry Andric } 9100b57cec5SDimitry Andric return TokenQueue.front(); 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric Token Scanner::getNext() { 9140b57cec5SDimitry Andric Token Ret = peekNext(); 9150b57cec5SDimitry Andric // TokenQueue can be empty if there was an error getting the next token. 9160b57cec5SDimitry Andric if (!TokenQueue.empty()) 9170b57cec5SDimitry Andric TokenQueue.pop_front(); 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric // There cannot be any referenced Token's if the TokenQueue is empty. So do a 9200b57cec5SDimitry Andric // quick deallocation of them all. 9210b57cec5SDimitry Andric if (TokenQueue.empty()) 9220b57cec5SDimitry Andric TokenQueue.resetAlloc(); 9230b57cec5SDimitry Andric 9240b57cec5SDimitry Andric return Ret; 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { 9280b57cec5SDimitry Andric if (Position == End) 9290b57cec5SDimitry Andric return Position; 9300b57cec5SDimitry Andric // Check 7 bit c-printable - b-char. 9310b57cec5SDimitry Andric if ( *Position == 0x09 9320b57cec5SDimitry Andric || (*Position >= 0x20 && *Position <= 0x7E)) 9330b57cec5SDimitry Andric return Position + 1; 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric // Check for valid UTF-8. 9360b57cec5SDimitry Andric if (uint8_t(*Position) & 0x80) { 9370b57cec5SDimitry Andric UTF8Decoded u8d = decodeUTF8(Position); 9380b57cec5SDimitry Andric if ( u8d.second != 0 9390b57cec5SDimitry Andric && u8d.first != 0xFEFF 9400b57cec5SDimitry Andric && ( u8d.first == 0x85 9410b57cec5SDimitry Andric || ( u8d.first >= 0xA0 9420b57cec5SDimitry Andric && u8d.first <= 0xD7FF) 9430b57cec5SDimitry Andric || ( u8d.first >= 0xE000 9440b57cec5SDimitry Andric && u8d.first <= 0xFFFD) 9450b57cec5SDimitry Andric || ( u8d.first >= 0x10000 9460b57cec5SDimitry Andric && u8d.first <= 0x10FFFF))) 9470b57cec5SDimitry Andric return Position + u8d.second; 9480b57cec5SDimitry Andric } 9490b57cec5SDimitry Andric return Position; 9500b57cec5SDimitry Andric } 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { 9530b57cec5SDimitry Andric if (Position == End) 9540b57cec5SDimitry Andric return Position; 9550b57cec5SDimitry Andric if (*Position == 0x0D) { 9560b57cec5SDimitry Andric if (Position + 1 != End && *(Position + 1) == 0x0A) 9570b57cec5SDimitry Andric return Position + 2; 9580b57cec5SDimitry Andric return Position + 1; 9590b57cec5SDimitry Andric } 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric if (*Position == 0x0A) 9620b57cec5SDimitry Andric return Position + 1; 9630b57cec5SDimitry Andric return Position; 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andric StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { 9670b57cec5SDimitry Andric if (Position == End) 9680b57cec5SDimitry Andric return Position; 9690b57cec5SDimitry Andric if (*Position == ' ') 9700b57cec5SDimitry Andric return Position + 1; 9710b57cec5SDimitry Andric return Position; 9720b57cec5SDimitry Andric } 9730b57cec5SDimitry Andric 9740b57cec5SDimitry Andric StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { 9750b57cec5SDimitry Andric if (Position == End) 9760b57cec5SDimitry Andric return Position; 9770b57cec5SDimitry Andric if (*Position == ' ' || *Position == '\t') 9780b57cec5SDimitry Andric return Position + 1; 9790b57cec5SDimitry Andric return Position; 9800b57cec5SDimitry Andric } 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { 9830b57cec5SDimitry Andric if (Position == End) 9840b57cec5SDimitry Andric return Position; 9850b57cec5SDimitry Andric if (*Position == ' ' || *Position == '\t') 9860b57cec5SDimitry Andric return Position; 9870b57cec5SDimitry Andric return skip_nb_char(Position); 9880b57cec5SDimitry Andric } 9890b57cec5SDimitry Andric 9900b57cec5SDimitry Andric StringRef::iterator Scanner::skip_while( SkipWhileFunc Func 9910b57cec5SDimitry Andric , StringRef::iterator Position) { 9920b57cec5SDimitry Andric while (true) { 9930b57cec5SDimitry Andric StringRef::iterator i = (this->*Func)(Position); 9940b57cec5SDimitry Andric if (i == Position) 9950b57cec5SDimitry Andric break; 9960b57cec5SDimitry Andric Position = i; 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric return Position; 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric void Scanner::advanceWhile(SkipWhileFunc Func) { 10020b57cec5SDimitry Andric auto Final = skip_while(Func, Current); 10030b57cec5SDimitry Andric Column += Final - Current; 10040b57cec5SDimitry Andric Current = Final; 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric 1007e8d8bef9SDimitry Andric static bool is_ns_hex_digit(const char C) { return isAlnum(C); } 10080b57cec5SDimitry Andric 1009e8d8bef9SDimitry Andric static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); } 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric void Scanner::scan_ns_uri_char() { 10120b57cec5SDimitry Andric while (true) { 10130b57cec5SDimitry Andric if (Current == End) 10140b57cec5SDimitry Andric break; 10150b57cec5SDimitry Andric if (( *Current == '%' 10160b57cec5SDimitry Andric && Current + 2 < End 10170b57cec5SDimitry Andric && is_ns_hex_digit(*(Current + 1)) 10180b57cec5SDimitry Andric && is_ns_hex_digit(*(Current + 2))) 10190b57cec5SDimitry Andric || is_ns_word_char(*Current) 10200b57cec5SDimitry Andric || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") 10210b57cec5SDimitry Andric != StringRef::npos) { 10220b57cec5SDimitry Andric ++Current; 10230b57cec5SDimitry Andric ++Column; 10240b57cec5SDimitry Andric } else 10250b57cec5SDimitry Andric break; 10260b57cec5SDimitry Andric } 10270b57cec5SDimitry Andric } 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andric bool Scanner::consume(uint32_t Expected) { 1030480093f4SDimitry Andric if (Expected >= 0x80) { 10315ffd83dbSDimitry Andric setError("Cannot consume non-ascii characters", Current); 1032480093f4SDimitry Andric return false; 1033480093f4SDimitry Andric } 10340b57cec5SDimitry Andric if (Current == End) 10350b57cec5SDimitry Andric return false; 1036480093f4SDimitry Andric if (uint8_t(*Current) >= 0x80) { 10375ffd83dbSDimitry Andric setError("Cannot consume non-ascii characters", Current); 1038480093f4SDimitry Andric return false; 1039480093f4SDimitry Andric } 10400b57cec5SDimitry Andric if (uint8_t(*Current) == Expected) { 10410b57cec5SDimitry Andric ++Current; 10420b57cec5SDimitry Andric ++Column; 10430b57cec5SDimitry Andric return true; 10440b57cec5SDimitry Andric } 10450b57cec5SDimitry Andric return false; 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric void Scanner::skip(uint32_t Distance) { 10490b57cec5SDimitry Andric Current += Distance; 10500b57cec5SDimitry Andric Column += Distance; 10510b57cec5SDimitry Andric assert(Current <= End && "Skipped past the end"); 10520b57cec5SDimitry Andric } 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric bool Scanner::isBlankOrBreak(StringRef::iterator Position) { 10550b57cec5SDimitry Andric if (Position == End) 10560b57cec5SDimitry Andric return false; 10570b57cec5SDimitry Andric return *Position == ' ' || *Position == '\t' || *Position == '\r' || 10580b57cec5SDimitry Andric *Position == '\n'; 10590b57cec5SDimitry Andric } 10600b57cec5SDimitry Andric 1061*5f757f3fSDimitry Andric bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) { 1062*5f757f3fSDimitry Andric if (Position == End || isBlankOrBreak(Position)) 1063*5f757f3fSDimitry Andric return false; 1064*5f757f3fSDimitry Andric if (FlowLevel && 1065*5f757f3fSDimitry Andric StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos) 1066*5f757f3fSDimitry Andric return false; 1067*5f757f3fSDimitry Andric return true; 1068*5f757f3fSDimitry Andric } 1069*5f757f3fSDimitry Andric 107081ad6265SDimitry Andric bool Scanner::isLineEmpty(StringRef Line) { 107181ad6265SDimitry Andric for (const auto *Position = Line.begin(); Position != Line.end(); ++Position) 107281ad6265SDimitry Andric if (!isBlankOrBreak(Position)) 107381ad6265SDimitry Andric return false; 107481ad6265SDimitry Andric return true; 107581ad6265SDimitry Andric } 107681ad6265SDimitry Andric 10770b57cec5SDimitry Andric bool Scanner::consumeLineBreakIfPresent() { 10780b57cec5SDimitry Andric auto Next = skip_b_break(Current); 10790b57cec5SDimitry Andric if (Next == Current) 10800b57cec5SDimitry Andric return false; 10810b57cec5SDimitry Andric Column = 0; 10820b57cec5SDimitry Andric ++Line; 10830b57cec5SDimitry Andric Current = Next; 10840b57cec5SDimitry Andric return true; 10850b57cec5SDimitry Andric } 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok 10880b57cec5SDimitry Andric , unsigned AtColumn 10890b57cec5SDimitry Andric , bool IsRequired) { 10900b57cec5SDimitry Andric if (IsSimpleKeyAllowed) { 10910b57cec5SDimitry Andric SimpleKey SK; 10920b57cec5SDimitry Andric SK.Tok = Tok; 10930b57cec5SDimitry Andric SK.Line = Line; 10940b57cec5SDimitry Andric SK.Column = AtColumn; 10950b57cec5SDimitry Andric SK.IsRequired = IsRequired; 10960b57cec5SDimitry Andric SK.FlowLevel = FlowLevel; 10970b57cec5SDimitry Andric SimpleKeys.push_back(SK); 10980b57cec5SDimitry Andric } 10990b57cec5SDimitry Andric } 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric void Scanner::removeStaleSimpleKeyCandidates() { 11020b57cec5SDimitry Andric for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); 11030b57cec5SDimitry Andric i != SimpleKeys.end();) { 11040b57cec5SDimitry Andric if (i->Line != Line || i->Column + 1024 < Column) { 11050b57cec5SDimitry Andric if (i->IsRequired) 11060b57cec5SDimitry Andric setError( "Could not find expected : for simple key" 11070b57cec5SDimitry Andric , i->Tok->Range.begin()); 11080b57cec5SDimitry Andric i = SimpleKeys.erase(i); 11090b57cec5SDimitry Andric } else 11100b57cec5SDimitry Andric ++i; 11110b57cec5SDimitry Andric } 11120b57cec5SDimitry Andric } 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andric void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { 11150b57cec5SDimitry Andric if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) 11160b57cec5SDimitry Andric SimpleKeys.pop_back(); 11170b57cec5SDimitry Andric } 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric bool Scanner::unrollIndent(int ToColumn) { 11200b57cec5SDimitry Andric Token T; 11210b57cec5SDimitry Andric // Indentation is ignored in flow. 11220b57cec5SDimitry Andric if (FlowLevel != 0) 11230b57cec5SDimitry Andric return true; 11240b57cec5SDimitry Andric 11250b57cec5SDimitry Andric while (Indent > ToColumn) { 11260b57cec5SDimitry Andric T.Kind = Token::TK_BlockEnd; 11270b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 11280b57cec5SDimitry Andric TokenQueue.push_back(T); 11290b57cec5SDimitry Andric Indent = Indents.pop_back_val(); 11300b57cec5SDimitry Andric } 11310b57cec5SDimitry Andric 11320b57cec5SDimitry Andric return true; 11330b57cec5SDimitry Andric } 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric bool Scanner::rollIndent( int ToColumn 11360b57cec5SDimitry Andric , Token::TokenKind Kind 11370b57cec5SDimitry Andric , TokenQueueT::iterator InsertPoint) { 11380b57cec5SDimitry Andric if (FlowLevel) 11390b57cec5SDimitry Andric return true; 11400b57cec5SDimitry Andric if (Indent < ToColumn) { 11410b57cec5SDimitry Andric Indents.push_back(Indent); 11420b57cec5SDimitry Andric Indent = ToColumn; 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric Token T; 11450b57cec5SDimitry Andric T.Kind = Kind; 11460b57cec5SDimitry Andric T.Range = StringRef(Current, 0); 11470b57cec5SDimitry Andric TokenQueue.insert(InsertPoint, T); 11480b57cec5SDimitry Andric } 11490b57cec5SDimitry Andric return true; 11500b57cec5SDimitry Andric } 11510b57cec5SDimitry Andric 11520b57cec5SDimitry Andric void Scanner::skipComment() { 1153e8d8bef9SDimitry Andric if (Current == End || *Current != '#') 11540b57cec5SDimitry Andric return; 11550b57cec5SDimitry Andric while (true) { 11560b57cec5SDimitry Andric // This may skip more than one byte, thus Column is only incremented 11570b57cec5SDimitry Andric // for code points. 11580b57cec5SDimitry Andric StringRef::iterator I = skip_nb_char(Current); 11590b57cec5SDimitry Andric if (I == Current) 11600b57cec5SDimitry Andric break; 11610b57cec5SDimitry Andric Current = I; 11620b57cec5SDimitry Andric ++Column; 11630b57cec5SDimitry Andric } 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric void Scanner::scanToNextToken() { 11670b57cec5SDimitry Andric while (true) { 1168e8d8bef9SDimitry Andric while (Current != End && (*Current == ' ' || *Current == '\t')) { 11690b57cec5SDimitry Andric skip(1); 11700b57cec5SDimitry Andric } 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric skipComment(); 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric // Skip EOL. 11750b57cec5SDimitry Andric StringRef::iterator i = skip_b_break(Current); 11760b57cec5SDimitry Andric if (i == Current) 11770b57cec5SDimitry Andric break; 11780b57cec5SDimitry Andric Current = i; 11790b57cec5SDimitry Andric ++Line; 11800b57cec5SDimitry Andric Column = 0; 11810b57cec5SDimitry Andric // New lines may start a simple key. 11820b57cec5SDimitry Andric if (!FlowLevel) 11830b57cec5SDimitry Andric IsSimpleKeyAllowed = true; 11840b57cec5SDimitry Andric } 11850b57cec5SDimitry Andric } 11860b57cec5SDimitry Andric 11870b57cec5SDimitry Andric bool Scanner::scanStreamStart() { 11880b57cec5SDimitry Andric IsStartOfStream = false; 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric EncodingInfo EI = getUnicodeEncoding(currentInput()); 11910b57cec5SDimitry Andric 11920b57cec5SDimitry Andric Token T; 11930b57cec5SDimitry Andric T.Kind = Token::TK_StreamStart; 11940b57cec5SDimitry Andric T.Range = StringRef(Current, EI.second); 11950b57cec5SDimitry Andric TokenQueue.push_back(T); 11960b57cec5SDimitry Andric Current += EI.second; 11970b57cec5SDimitry Andric return true; 11980b57cec5SDimitry Andric } 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric bool Scanner::scanStreamEnd() { 12010b57cec5SDimitry Andric // Force an ending new line if one isn't present. 12020b57cec5SDimitry Andric if (Column != 0) { 12030b57cec5SDimitry Andric Column = 0; 12040b57cec5SDimitry Andric ++Line; 12050b57cec5SDimitry Andric } 12060b57cec5SDimitry Andric 12070b57cec5SDimitry Andric unrollIndent(-1); 12080b57cec5SDimitry Andric SimpleKeys.clear(); 12090b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1210*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric Token T; 12130b57cec5SDimitry Andric T.Kind = Token::TK_StreamEnd; 12140b57cec5SDimitry Andric T.Range = StringRef(Current, 0); 12150b57cec5SDimitry Andric TokenQueue.push_back(T); 12160b57cec5SDimitry Andric return true; 12170b57cec5SDimitry Andric } 12180b57cec5SDimitry Andric 12190b57cec5SDimitry Andric bool Scanner::scanDirective() { 12200b57cec5SDimitry Andric // Reset the indentation level. 12210b57cec5SDimitry Andric unrollIndent(-1); 12220b57cec5SDimitry Andric SimpleKeys.clear(); 12230b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1224*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric StringRef::iterator Start = Current; 12270b57cec5SDimitry Andric consume('%'); 12280b57cec5SDimitry Andric StringRef::iterator NameStart = Current; 12290b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current); 12300b57cec5SDimitry Andric StringRef Name(NameStart, Current - NameStart); 12310b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_s_white, Current); 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric Token T; 12340b57cec5SDimitry Andric if (Name == "YAML") { 12350b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current); 12360b57cec5SDimitry Andric T.Kind = Token::TK_VersionDirective; 12370b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 12380b57cec5SDimitry Andric TokenQueue.push_back(T); 12390b57cec5SDimitry Andric return true; 12400b57cec5SDimitry Andric } else if(Name == "TAG") { 12410b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current); 12420b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_s_white, Current); 12430b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current); 12440b57cec5SDimitry Andric T.Kind = Token::TK_TagDirective; 12450b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 12460b57cec5SDimitry Andric TokenQueue.push_back(T); 12470b57cec5SDimitry Andric return true; 12480b57cec5SDimitry Andric } 12490b57cec5SDimitry Andric return false; 12500b57cec5SDimitry Andric } 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric bool Scanner::scanDocumentIndicator(bool IsStart) { 12530b57cec5SDimitry Andric unrollIndent(-1); 12540b57cec5SDimitry Andric SimpleKeys.clear(); 12550b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1256*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andric Token T; 12590b57cec5SDimitry Andric T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; 12600b57cec5SDimitry Andric T.Range = StringRef(Current, 3); 12610b57cec5SDimitry Andric skip(3); 12620b57cec5SDimitry Andric TokenQueue.push_back(T); 12630b57cec5SDimitry Andric return true; 12640b57cec5SDimitry Andric } 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric bool Scanner::scanFlowCollectionStart(bool IsSequence) { 12670b57cec5SDimitry Andric Token T; 12680b57cec5SDimitry Andric T.Kind = IsSequence ? Token::TK_FlowSequenceStart 12690b57cec5SDimitry Andric : Token::TK_FlowMappingStart; 12700b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 12710b57cec5SDimitry Andric skip(1); 12720b57cec5SDimitry Andric TokenQueue.push_back(T); 12730b57cec5SDimitry Andric 12740b57cec5SDimitry Andric // [ and { may begin a simple key. 12750b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); 12760b57cec5SDimitry Andric 12770b57cec5SDimitry Andric // And may also be followed by a simple key. 12780b57cec5SDimitry Andric IsSimpleKeyAllowed = true; 1279*5f757f3fSDimitry Andric // Adjacent values are allowed in flows only after JSON-style keys. 1280*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 12810b57cec5SDimitry Andric ++FlowLevel; 12820b57cec5SDimitry Andric return true; 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric bool Scanner::scanFlowCollectionEnd(bool IsSequence) { 12860b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 12870b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1288*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = true; 12890b57cec5SDimitry Andric Token T; 12900b57cec5SDimitry Andric T.Kind = IsSequence ? Token::TK_FlowSequenceEnd 12910b57cec5SDimitry Andric : Token::TK_FlowMappingEnd; 12920b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 12930b57cec5SDimitry Andric skip(1); 12940b57cec5SDimitry Andric TokenQueue.push_back(T); 12950b57cec5SDimitry Andric if (FlowLevel) 12960b57cec5SDimitry Andric --FlowLevel; 12970b57cec5SDimitry Andric return true; 12980b57cec5SDimitry Andric } 12990b57cec5SDimitry Andric 13000b57cec5SDimitry Andric bool Scanner::scanFlowEntry() { 13010b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 13020b57cec5SDimitry Andric IsSimpleKeyAllowed = true; 1303*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 13040b57cec5SDimitry Andric Token T; 13050b57cec5SDimitry Andric T.Kind = Token::TK_FlowEntry; 13060b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 13070b57cec5SDimitry Andric skip(1); 13080b57cec5SDimitry Andric TokenQueue.push_back(T); 13090b57cec5SDimitry Andric return true; 13100b57cec5SDimitry Andric } 13110b57cec5SDimitry Andric 13120b57cec5SDimitry Andric bool Scanner::scanBlockEntry() { 13130b57cec5SDimitry Andric rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); 13140b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 13150b57cec5SDimitry Andric IsSimpleKeyAllowed = true; 1316*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 13170b57cec5SDimitry Andric Token T; 13180b57cec5SDimitry Andric T.Kind = Token::TK_BlockEntry; 13190b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 13200b57cec5SDimitry Andric skip(1); 13210b57cec5SDimitry Andric TokenQueue.push_back(T); 13220b57cec5SDimitry Andric return true; 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric bool Scanner::scanKey() { 13260b57cec5SDimitry Andric if (!FlowLevel) 13270b57cec5SDimitry Andric rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 13300b57cec5SDimitry Andric IsSimpleKeyAllowed = !FlowLevel; 1331*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric Token T; 13340b57cec5SDimitry Andric T.Kind = Token::TK_Key; 13350b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 13360b57cec5SDimitry Andric skip(1); 13370b57cec5SDimitry Andric TokenQueue.push_back(T); 13380b57cec5SDimitry Andric return true; 13390b57cec5SDimitry Andric } 13400b57cec5SDimitry Andric 13410b57cec5SDimitry Andric bool Scanner::scanValue() { 13420b57cec5SDimitry Andric // If the previous token could have been a simple key, insert the key token 13430b57cec5SDimitry Andric // into the token queue. 13440b57cec5SDimitry Andric if (!SimpleKeys.empty()) { 13450b57cec5SDimitry Andric SimpleKey SK = SimpleKeys.pop_back_val(); 13460b57cec5SDimitry Andric Token T; 13470b57cec5SDimitry Andric T.Kind = Token::TK_Key; 13480b57cec5SDimitry Andric T.Range = SK.Tok->Range; 13490b57cec5SDimitry Andric TokenQueueT::iterator i, e; 13500b57cec5SDimitry Andric for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { 13510b57cec5SDimitry Andric if (i == SK.Tok) 13520b57cec5SDimitry Andric break; 13530b57cec5SDimitry Andric } 1354480093f4SDimitry Andric if (i == e) { 1355480093f4SDimitry Andric Failed = true; 1356480093f4SDimitry Andric return false; 1357480093f4SDimitry Andric } 13580b57cec5SDimitry Andric i = TokenQueue.insert(i, T); 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andric // We may also need to add a Block-Mapping-Start token. 13610b57cec5SDimitry Andric rollIndent(SK.Column, Token::TK_BlockMappingStart, i); 13620b57cec5SDimitry Andric 13630b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 13640b57cec5SDimitry Andric } else { 13650b57cec5SDimitry Andric if (!FlowLevel) 13660b57cec5SDimitry Andric rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 13670b57cec5SDimitry Andric IsSimpleKeyAllowed = !FlowLevel; 13680b57cec5SDimitry Andric } 1369*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric Token T; 13720b57cec5SDimitry Andric T.Kind = Token::TK_Value; 13730b57cec5SDimitry Andric T.Range = StringRef(Current, 1); 13740b57cec5SDimitry Andric skip(1); 13750b57cec5SDimitry Andric TokenQueue.push_back(T); 13760b57cec5SDimitry Andric return true; 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric // Forbidding inlining improves performance by roughly 20%. 13800b57cec5SDimitry Andric // FIXME: Remove once llvm optimizes this to the faster version without hints. 13810b57cec5SDimitry Andric LLVM_ATTRIBUTE_NOINLINE static bool 13820b57cec5SDimitry Andric wasEscaped(StringRef::iterator First, StringRef::iterator Position); 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric // Returns whether a character at 'Position' was escaped with a leading '\'. 13850b57cec5SDimitry Andric // 'First' specifies the position of the first character in the string. 13860b57cec5SDimitry Andric static bool wasEscaped(StringRef::iterator First, 13870b57cec5SDimitry Andric StringRef::iterator Position) { 13880b57cec5SDimitry Andric assert(Position - 1 >= First); 13890b57cec5SDimitry Andric StringRef::iterator I = Position - 1; 13900b57cec5SDimitry Andric // We calculate the number of consecutive '\'s before the current position 13910b57cec5SDimitry Andric // by iterating backwards through our string. 13920b57cec5SDimitry Andric while (I >= First && *I == '\\') --I; 13930b57cec5SDimitry Andric // (Position - 1 - I) now contains the number of '\'s before the current 13940b57cec5SDimitry Andric // position. If it is odd, the character at 'Position' was escaped. 13950b57cec5SDimitry Andric return (Position - 1 - I) % 2 == 1; 13960b57cec5SDimitry Andric } 13970b57cec5SDimitry Andric 13980b57cec5SDimitry Andric bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { 13990b57cec5SDimitry Andric StringRef::iterator Start = Current; 14000b57cec5SDimitry Andric unsigned ColStart = Column; 14010b57cec5SDimitry Andric if (IsDoubleQuoted) { 14020b57cec5SDimitry Andric do { 14030b57cec5SDimitry Andric ++Current; 14040b57cec5SDimitry Andric while (Current != End && *Current != '"') 14050b57cec5SDimitry Andric ++Current; 14060b57cec5SDimitry Andric // Repeat until the previous character was not a '\' or was an escaped 14070b57cec5SDimitry Andric // backslash. 14080b57cec5SDimitry Andric } while ( Current != End 14090b57cec5SDimitry Andric && *(Current - 1) == '\\' 14100b57cec5SDimitry Andric && wasEscaped(Start + 1, Current)); 14110b57cec5SDimitry Andric } else { 14120b57cec5SDimitry Andric skip(1); 1413e8d8bef9SDimitry Andric while (Current != End) { 14140b57cec5SDimitry Andric // Skip a ' followed by another '. 14150b57cec5SDimitry Andric if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { 14160b57cec5SDimitry Andric skip(2); 14170b57cec5SDimitry Andric continue; 14180b57cec5SDimitry Andric } else if (*Current == '\'') 14190b57cec5SDimitry Andric break; 14200b57cec5SDimitry Andric StringRef::iterator i = skip_nb_char(Current); 14210b57cec5SDimitry Andric if (i == Current) { 14220b57cec5SDimitry Andric i = skip_b_break(Current); 14230b57cec5SDimitry Andric if (i == Current) 14240b57cec5SDimitry Andric break; 14250b57cec5SDimitry Andric Current = i; 14260b57cec5SDimitry Andric Column = 0; 14270b57cec5SDimitry Andric ++Line; 14280b57cec5SDimitry Andric } else { 14290b57cec5SDimitry Andric if (i == End) 14300b57cec5SDimitry Andric break; 14310b57cec5SDimitry Andric Current = i; 14320b57cec5SDimitry Andric ++Column; 14330b57cec5SDimitry Andric } 14340b57cec5SDimitry Andric } 14350b57cec5SDimitry Andric } 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric if (Current == End) { 14380b57cec5SDimitry Andric setError("Expected quote at end of scalar", Current); 14390b57cec5SDimitry Andric return false; 14400b57cec5SDimitry Andric } 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric skip(1); // Skip ending quote. 14430b57cec5SDimitry Andric Token T; 14440b57cec5SDimitry Andric T.Kind = Token::TK_Scalar; 14450b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 14460b57cec5SDimitry Andric TokenQueue.push_back(T); 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); 14490b57cec5SDimitry Andric 14500b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1451*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = true; 14520b57cec5SDimitry Andric 14530b57cec5SDimitry Andric return true; 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric 14560b57cec5SDimitry Andric bool Scanner::scanPlainScalar() { 14570b57cec5SDimitry Andric StringRef::iterator Start = Current; 14580b57cec5SDimitry Andric unsigned ColStart = Column; 14590b57cec5SDimitry Andric unsigned LeadingBlanks = 0; 14600b57cec5SDimitry Andric assert(Indent >= -1 && "Indent must be >= -1 !"); 14610b57cec5SDimitry Andric unsigned indent = static_cast<unsigned>(Indent + 1); 1462e8d8bef9SDimitry Andric while (Current != End) { 14630b57cec5SDimitry Andric if (*Current == '#') 14640b57cec5SDimitry Andric break; 14650b57cec5SDimitry Andric 1466*5f757f3fSDimitry Andric while (Current != End && 1467*5f757f3fSDimitry Andric ((*Current != ':' && isPlainSafeNonBlank(Current)) || 1468*5f757f3fSDimitry Andric (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) { 14690b57cec5SDimitry Andric StringRef::iterator i = skip_nb_char(Current); 14700b57cec5SDimitry Andric if (i == Current) 14710b57cec5SDimitry Andric break; 14720b57cec5SDimitry Andric Current = i; 14730b57cec5SDimitry Andric ++Column; 14740b57cec5SDimitry Andric } 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric // Are we at the end? 14770b57cec5SDimitry Andric if (!isBlankOrBreak(Current)) 14780b57cec5SDimitry Andric break; 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andric // Eat blanks. 14810b57cec5SDimitry Andric StringRef::iterator Tmp = Current; 14820b57cec5SDimitry Andric while (isBlankOrBreak(Tmp)) { 14830b57cec5SDimitry Andric StringRef::iterator i = skip_s_white(Tmp); 14840b57cec5SDimitry Andric if (i != Tmp) { 14850b57cec5SDimitry Andric if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { 14860b57cec5SDimitry Andric setError("Found invalid tab character in indentation", Tmp); 14870b57cec5SDimitry Andric return false; 14880b57cec5SDimitry Andric } 14890b57cec5SDimitry Andric Tmp = i; 14900b57cec5SDimitry Andric ++Column; 14910b57cec5SDimitry Andric } else { 14920b57cec5SDimitry Andric i = skip_b_break(Tmp); 14930b57cec5SDimitry Andric if (!LeadingBlanks) 14940b57cec5SDimitry Andric LeadingBlanks = 1; 14950b57cec5SDimitry Andric Tmp = i; 14960b57cec5SDimitry Andric Column = 0; 14970b57cec5SDimitry Andric ++Line; 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric } 15000b57cec5SDimitry Andric 15010b57cec5SDimitry Andric if (!FlowLevel && Column < indent) 15020b57cec5SDimitry Andric break; 15030b57cec5SDimitry Andric 15040b57cec5SDimitry Andric Current = Tmp; 15050b57cec5SDimitry Andric } 15060b57cec5SDimitry Andric if (Start == Current) { 15070b57cec5SDimitry Andric setError("Got empty plain scalar", Start); 15080b57cec5SDimitry Andric return false; 15090b57cec5SDimitry Andric } 15100b57cec5SDimitry Andric Token T; 15110b57cec5SDimitry Andric T.Kind = Token::TK_Scalar; 15120b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 15130b57cec5SDimitry Andric TokenQueue.push_back(T); 15140b57cec5SDimitry Andric 15150b57cec5SDimitry Andric // Plain scalars can be simple keys. 15160b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1519*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 15200b57cec5SDimitry Andric 15210b57cec5SDimitry Andric return true; 15220b57cec5SDimitry Andric } 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric bool Scanner::scanAliasOrAnchor(bool IsAlias) { 15250b57cec5SDimitry Andric StringRef::iterator Start = Current; 15260b57cec5SDimitry Andric unsigned ColStart = Column; 15270b57cec5SDimitry Andric skip(1); 1528e8d8bef9SDimitry Andric while (Current != End) { 15290b57cec5SDimitry Andric if ( *Current == '[' || *Current == ']' 15300b57cec5SDimitry Andric || *Current == '{' || *Current == '}' 15310b57cec5SDimitry Andric || *Current == ',' 15320b57cec5SDimitry Andric || *Current == ':') 15330b57cec5SDimitry Andric break; 15340b57cec5SDimitry Andric StringRef::iterator i = skip_ns_char(Current); 15350b57cec5SDimitry Andric if (i == Current) 15360b57cec5SDimitry Andric break; 15370b57cec5SDimitry Andric Current = i; 15380b57cec5SDimitry Andric ++Column; 15390b57cec5SDimitry Andric } 15400b57cec5SDimitry Andric 1541e8d8bef9SDimitry Andric if (Start + 1 == Current) { 15420b57cec5SDimitry Andric setError("Got empty alias or anchor", Start); 15430b57cec5SDimitry Andric return false; 15440b57cec5SDimitry Andric } 15450b57cec5SDimitry Andric 15460b57cec5SDimitry Andric Token T; 15470b57cec5SDimitry Andric T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; 15480b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 15490b57cec5SDimitry Andric TokenQueue.push_back(T); 15500b57cec5SDimitry Andric 15510b57cec5SDimitry Andric // Alias and anchors can be simple keys. 15520b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1555*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 15560b57cec5SDimitry Andric 15570b57cec5SDimitry Andric return true; 15580b57cec5SDimitry Andric } 15590b57cec5SDimitry Andric 156081ad6265SDimitry Andric bool Scanner::scanBlockScalarIndicators(char &StyleIndicator, 156181ad6265SDimitry Andric char &ChompingIndicator, 156281ad6265SDimitry Andric unsigned &IndentIndicator, 156381ad6265SDimitry Andric bool &IsDone) { 156481ad6265SDimitry Andric StyleIndicator = scanBlockStyleIndicator(); 156581ad6265SDimitry Andric if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone)) 156681ad6265SDimitry Andric return false; 156781ad6265SDimitry Andric return true; 156881ad6265SDimitry Andric } 156981ad6265SDimitry Andric 157081ad6265SDimitry Andric char Scanner::scanBlockStyleIndicator() { 157181ad6265SDimitry Andric char Indicator = ' '; 157281ad6265SDimitry Andric if (Current != End && (*Current == '>' || *Current == '|')) { 157381ad6265SDimitry Andric Indicator = *Current; 157481ad6265SDimitry Andric skip(1); 157581ad6265SDimitry Andric } 157681ad6265SDimitry Andric return Indicator; 157781ad6265SDimitry Andric } 157881ad6265SDimitry Andric 15790b57cec5SDimitry Andric char Scanner::scanBlockChompingIndicator() { 15800b57cec5SDimitry Andric char Indicator = ' '; 15810b57cec5SDimitry Andric if (Current != End && (*Current == '+' || *Current == '-')) { 15820b57cec5SDimitry Andric Indicator = *Current; 15830b57cec5SDimitry Andric skip(1); 15840b57cec5SDimitry Andric } 15850b57cec5SDimitry Andric return Indicator; 15860b57cec5SDimitry Andric } 15870b57cec5SDimitry Andric 15880b57cec5SDimitry Andric /// Get the number of line breaks after chomping. 15890b57cec5SDimitry Andric /// 15900b57cec5SDimitry Andric /// Return the number of trailing line breaks to emit, depending on 15910b57cec5SDimitry Andric /// \p ChompingIndicator. 15920b57cec5SDimitry Andric static unsigned getChompedLineBreaks(char ChompingIndicator, 15930b57cec5SDimitry Andric unsigned LineBreaks, StringRef Str) { 15940b57cec5SDimitry Andric if (ChompingIndicator == '-') // Strip all line breaks. 15950b57cec5SDimitry Andric return 0; 15960b57cec5SDimitry Andric if (ChompingIndicator == '+') // Keep all line breaks. 15970b57cec5SDimitry Andric return LineBreaks; 15980b57cec5SDimitry Andric // Clip trailing lines. 15990b57cec5SDimitry Andric return Str.empty() ? 0 : 1; 16000b57cec5SDimitry Andric } 16010b57cec5SDimitry Andric 16020b57cec5SDimitry Andric unsigned Scanner::scanBlockIndentationIndicator() { 16030b57cec5SDimitry Andric unsigned Indent = 0; 16040b57cec5SDimitry Andric if (Current != End && (*Current >= '1' && *Current <= '9')) { 16050b57cec5SDimitry Andric Indent = unsigned(*Current - '0'); 16060b57cec5SDimitry Andric skip(1); 16070b57cec5SDimitry Andric } 16080b57cec5SDimitry Andric return Indent; 16090b57cec5SDimitry Andric } 16100b57cec5SDimitry Andric 16110b57cec5SDimitry Andric bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, 16120b57cec5SDimitry Andric unsigned &IndentIndicator, bool &IsDone) { 16130b57cec5SDimitry Andric auto Start = Current; 16140b57cec5SDimitry Andric 16150b57cec5SDimitry Andric ChompingIndicator = scanBlockChompingIndicator(); 16160b57cec5SDimitry Andric IndentIndicator = scanBlockIndentationIndicator(); 16170b57cec5SDimitry Andric // Check for the chomping indicator once again. 16180b57cec5SDimitry Andric if (ChompingIndicator == ' ') 16190b57cec5SDimitry Andric ChompingIndicator = scanBlockChompingIndicator(); 16200b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_s_white, Current); 16210b57cec5SDimitry Andric skipComment(); 16220b57cec5SDimitry Andric 16230b57cec5SDimitry Andric if (Current == End) { // EOF, we have an empty scalar. 16240b57cec5SDimitry Andric Token T; 16250b57cec5SDimitry Andric T.Kind = Token::TK_BlockScalar; 16260b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 16270b57cec5SDimitry Andric TokenQueue.push_back(T); 16280b57cec5SDimitry Andric IsDone = true; 16290b57cec5SDimitry Andric return true; 16300b57cec5SDimitry Andric } 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric if (!consumeLineBreakIfPresent()) { 16330b57cec5SDimitry Andric setError("Expected a line break after block scalar header", Current); 16340b57cec5SDimitry Andric return false; 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric return true; 16370b57cec5SDimitry Andric } 16380b57cec5SDimitry Andric 16390b57cec5SDimitry Andric bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, 16400b57cec5SDimitry Andric unsigned BlockExitIndent, 16410b57cec5SDimitry Andric unsigned &LineBreaks, bool &IsDone) { 16420b57cec5SDimitry Andric unsigned MaxAllSpaceLineCharacters = 0; 16430b57cec5SDimitry Andric StringRef::iterator LongestAllSpaceLine; 16440b57cec5SDimitry Andric 16450b57cec5SDimitry Andric while (true) { 16460b57cec5SDimitry Andric advanceWhile(&Scanner::skip_s_space); 16470b57cec5SDimitry Andric if (skip_nb_char(Current) != Current) { 16480b57cec5SDimitry Andric // This line isn't empty, so try and find the indentation. 16490b57cec5SDimitry Andric if (Column <= BlockExitIndent) { // End of the block literal. 16500b57cec5SDimitry Andric IsDone = true; 16510b57cec5SDimitry Andric return true; 16520b57cec5SDimitry Andric } 16530b57cec5SDimitry Andric // We found the block's indentation. 16540b57cec5SDimitry Andric BlockIndent = Column; 16550b57cec5SDimitry Andric if (MaxAllSpaceLineCharacters > BlockIndent) { 16560b57cec5SDimitry Andric setError( 16570b57cec5SDimitry Andric "Leading all-spaces line must be smaller than the block indent", 16580b57cec5SDimitry Andric LongestAllSpaceLine); 16590b57cec5SDimitry Andric return false; 16600b57cec5SDimitry Andric } 16610b57cec5SDimitry Andric return true; 16620b57cec5SDimitry Andric } 16630b57cec5SDimitry Andric if (skip_b_break(Current) != Current && 16640b57cec5SDimitry Andric Column > MaxAllSpaceLineCharacters) { 16650b57cec5SDimitry Andric // Record the longest all-space line in case it's longer than the 16660b57cec5SDimitry Andric // discovered block indent. 16670b57cec5SDimitry Andric MaxAllSpaceLineCharacters = Column; 16680b57cec5SDimitry Andric LongestAllSpaceLine = Current; 16690b57cec5SDimitry Andric } 16700b57cec5SDimitry Andric 16710b57cec5SDimitry Andric // Check for EOF. 16720b57cec5SDimitry Andric if (Current == End) { 16730b57cec5SDimitry Andric IsDone = true; 16740b57cec5SDimitry Andric return true; 16750b57cec5SDimitry Andric } 16760b57cec5SDimitry Andric 16770b57cec5SDimitry Andric if (!consumeLineBreakIfPresent()) { 16780b57cec5SDimitry Andric IsDone = true; 16790b57cec5SDimitry Andric return true; 16800b57cec5SDimitry Andric } 16810b57cec5SDimitry Andric ++LineBreaks; 16820b57cec5SDimitry Andric } 16830b57cec5SDimitry Andric return true; 16840b57cec5SDimitry Andric } 16850b57cec5SDimitry Andric 16860b57cec5SDimitry Andric bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, 16870b57cec5SDimitry Andric unsigned BlockExitIndent, bool &IsDone) { 16880b57cec5SDimitry Andric // Skip the indentation. 16890b57cec5SDimitry Andric while (Column < BlockIndent) { 16900b57cec5SDimitry Andric auto I = skip_s_space(Current); 16910b57cec5SDimitry Andric if (I == Current) 16920b57cec5SDimitry Andric break; 16930b57cec5SDimitry Andric Current = I; 16940b57cec5SDimitry Andric ++Column; 16950b57cec5SDimitry Andric } 16960b57cec5SDimitry Andric 16970b57cec5SDimitry Andric if (skip_nb_char(Current) == Current) 16980b57cec5SDimitry Andric return true; 16990b57cec5SDimitry Andric 17000b57cec5SDimitry Andric if (Column <= BlockExitIndent) { // End of the block literal. 17010b57cec5SDimitry Andric IsDone = true; 17020b57cec5SDimitry Andric return true; 17030b57cec5SDimitry Andric } 17040b57cec5SDimitry Andric 17050b57cec5SDimitry Andric if (Column < BlockIndent) { 17060b57cec5SDimitry Andric if (Current != End && *Current == '#') { // Trailing comment. 17070b57cec5SDimitry Andric IsDone = true; 17080b57cec5SDimitry Andric return true; 17090b57cec5SDimitry Andric } 17100b57cec5SDimitry Andric setError("A text line is less indented than the block scalar", Current); 17110b57cec5SDimitry Andric return false; 17120b57cec5SDimitry Andric } 17130b57cec5SDimitry Andric return true; // A normal text line. 17140b57cec5SDimitry Andric } 17150b57cec5SDimitry Andric 17160b57cec5SDimitry Andric bool Scanner::scanBlockScalar(bool IsLiteral) { 17170b57cec5SDimitry Andric assert(*Current == '|' || *Current == '>'); 171881ad6265SDimitry Andric char StyleIndicator; 17190b57cec5SDimitry Andric char ChompingIndicator; 17200b57cec5SDimitry Andric unsigned BlockIndent; 17210b57cec5SDimitry Andric bool IsDone = false; 172281ad6265SDimitry Andric if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent, 172381ad6265SDimitry Andric IsDone)) 17240b57cec5SDimitry Andric return false; 17250b57cec5SDimitry Andric if (IsDone) 17260b57cec5SDimitry Andric return true; 172781ad6265SDimitry Andric bool IsFolded = StyleIndicator == '>'; 17280b57cec5SDimitry Andric 172981ad6265SDimitry Andric const auto *Start = Current; 17300b57cec5SDimitry Andric unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; 17310b57cec5SDimitry Andric unsigned LineBreaks = 0; 17320b57cec5SDimitry Andric if (BlockIndent == 0) { 17330b57cec5SDimitry Andric if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, 17340b57cec5SDimitry Andric IsDone)) 17350b57cec5SDimitry Andric return false; 17360b57cec5SDimitry Andric } 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric // Scan the block's scalars body. 17390b57cec5SDimitry Andric SmallString<256> Str; 17400b57cec5SDimitry Andric while (!IsDone) { 17410b57cec5SDimitry Andric if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) 17420b57cec5SDimitry Andric return false; 17430b57cec5SDimitry Andric if (IsDone) 17440b57cec5SDimitry Andric break; 17450b57cec5SDimitry Andric 17460b57cec5SDimitry Andric // Parse the current line. 17470b57cec5SDimitry Andric auto LineStart = Current; 17480b57cec5SDimitry Andric advanceWhile(&Scanner::skip_nb_char); 17490b57cec5SDimitry Andric if (LineStart != Current) { 175081ad6265SDimitry Andric if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) { 175181ad6265SDimitry Andric // The folded style "folds" any single line break between content into a 175281ad6265SDimitry Andric // single space, except when that content is "empty" (only contains 175381ad6265SDimitry Andric // whitespace) in which case the line break is left as-is. 175481ad6265SDimitry Andric if (LineBreaks == 1) { 175581ad6265SDimitry Andric Str.append(LineBreaks, 175681ad6265SDimitry Andric isLineEmpty(StringRef(LineStart, Current - LineStart)) 175781ad6265SDimitry Andric ? '\n' 175881ad6265SDimitry Andric : ' '); 175981ad6265SDimitry Andric } 176081ad6265SDimitry Andric // If we saw a single line break, we are completely replacing it and so 176181ad6265SDimitry Andric // want `LineBreaks == 0`. Otherwise this decrement accounts for the 176281ad6265SDimitry Andric // fact that the first line break is "trimmed", only being used to 176381ad6265SDimitry Andric // signal a sequence of line breaks which should not be folded. 176481ad6265SDimitry Andric LineBreaks--; 176581ad6265SDimitry Andric } 17660b57cec5SDimitry Andric Str.append(LineBreaks, '\n'); 17670b57cec5SDimitry Andric Str.append(StringRef(LineStart, Current - LineStart)); 17680b57cec5SDimitry Andric LineBreaks = 0; 17690b57cec5SDimitry Andric } 17700b57cec5SDimitry Andric 17710b57cec5SDimitry Andric // Check for EOF. 17720b57cec5SDimitry Andric if (Current == End) 17730b57cec5SDimitry Andric break; 17740b57cec5SDimitry Andric 17750b57cec5SDimitry Andric if (!consumeLineBreakIfPresent()) 17760b57cec5SDimitry Andric break; 17770b57cec5SDimitry Andric ++LineBreaks; 17780b57cec5SDimitry Andric } 17790b57cec5SDimitry Andric 17800b57cec5SDimitry Andric if (Current == End && !LineBreaks) 17810b57cec5SDimitry Andric // Ensure that there is at least one line break before the end of file. 17820b57cec5SDimitry Andric LineBreaks = 1; 17830b57cec5SDimitry Andric Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); 17840b57cec5SDimitry Andric 17850b57cec5SDimitry Andric // New lines may start a simple key. 17860b57cec5SDimitry Andric if (!FlowLevel) 17870b57cec5SDimitry Andric IsSimpleKeyAllowed = true; 1788*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 17890b57cec5SDimitry Andric 17900b57cec5SDimitry Andric Token T; 17910b57cec5SDimitry Andric T.Kind = Token::TK_BlockScalar; 17920b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 17935ffd83dbSDimitry Andric T.Value = std::string(Str); 17940b57cec5SDimitry Andric TokenQueue.push_back(T); 17950b57cec5SDimitry Andric return true; 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric bool Scanner::scanTag() { 17990b57cec5SDimitry Andric StringRef::iterator Start = Current; 18000b57cec5SDimitry Andric unsigned ColStart = Column; 18010b57cec5SDimitry Andric skip(1); // Eat !. 18020b57cec5SDimitry Andric if (Current == End || isBlankOrBreak(Current)); // An empty tag. 18030b57cec5SDimitry Andric else if (*Current == '<') { 18040b57cec5SDimitry Andric skip(1); 18050b57cec5SDimitry Andric scan_ns_uri_char(); 18060b57cec5SDimitry Andric if (!consume('>')) 18070b57cec5SDimitry Andric return false; 18080b57cec5SDimitry Andric } else { 18090b57cec5SDimitry Andric // FIXME: Actually parse the c-ns-shorthand-tag rule. 18100b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current); 18110b57cec5SDimitry Andric } 18120b57cec5SDimitry Andric 18130b57cec5SDimitry Andric Token T; 18140b57cec5SDimitry Andric T.Kind = Token::TK_Tag; 18150b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start); 18160b57cec5SDimitry Andric TokenQueue.push_back(T); 18170b57cec5SDimitry Andric 18180b57cec5SDimitry Andric // Tags can be simple keys. 18190b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); 18200b57cec5SDimitry Andric 18210b57cec5SDimitry Andric IsSimpleKeyAllowed = false; 1822*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false; 18230b57cec5SDimitry Andric 18240b57cec5SDimitry Andric return true; 18250b57cec5SDimitry Andric } 18260b57cec5SDimitry Andric 18270b57cec5SDimitry Andric bool Scanner::fetchMoreTokens() { 18280b57cec5SDimitry Andric if (IsStartOfStream) 18290b57cec5SDimitry Andric return scanStreamStart(); 18300b57cec5SDimitry Andric 18310b57cec5SDimitry Andric scanToNextToken(); 18320b57cec5SDimitry Andric 18330b57cec5SDimitry Andric if (Current == End) 18340b57cec5SDimitry Andric return scanStreamEnd(); 18350b57cec5SDimitry Andric 18360b57cec5SDimitry Andric removeStaleSimpleKeyCandidates(); 18370b57cec5SDimitry Andric 18380b57cec5SDimitry Andric unrollIndent(Column); 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric if (Column == 0 && *Current == '%') 18410b57cec5SDimitry Andric return scanDirective(); 18420b57cec5SDimitry Andric 18430b57cec5SDimitry Andric if (Column == 0 && Current + 4 <= End 18440b57cec5SDimitry Andric && *Current == '-' 18450b57cec5SDimitry Andric && *(Current + 1) == '-' 18460b57cec5SDimitry Andric && *(Current + 2) == '-' 18470b57cec5SDimitry Andric && (Current + 3 == End || isBlankOrBreak(Current + 3))) 18480b57cec5SDimitry Andric return scanDocumentIndicator(true); 18490b57cec5SDimitry Andric 18500b57cec5SDimitry Andric if (Column == 0 && Current + 4 <= End 18510b57cec5SDimitry Andric && *Current == '.' 18520b57cec5SDimitry Andric && *(Current + 1) == '.' 18530b57cec5SDimitry Andric && *(Current + 2) == '.' 18540b57cec5SDimitry Andric && (Current + 3 == End || isBlankOrBreak(Current + 3))) 18550b57cec5SDimitry Andric return scanDocumentIndicator(false); 18560b57cec5SDimitry Andric 18570b57cec5SDimitry Andric if (*Current == '[') 18580b57cec5SDimitry Andric return scanFlowCollectionStart(true); 18590b57cec5SDimitry Andric 18600b57cec5SDimitry Andric if (*Current == '{') 18610b57cec5SDimitry Andric return scanFlowCollectionStart(false); 18620b57cec5SDimitry Andric 18630b57cec5SDimitry Andric if (*Current == ']') 18640b57cec5SDimitry Andric return scanFlowCollectionEnd(true); 18650b57cec5SDimitry Andric 18660b57cec5SDimitry Andric if (*Current == '}') 18670b57cec5SDimitry Andric return scanFlowCollectionEnd(false); 18680b57cec5SDimitry Andric 18690b57cec5SDimitry Andric if (*Current == ',') 18700b57cec5SDimitry Andric return scanFlowEntry(); 18710b57cec5SDimitry Andric 1872*5f757f3fSDimitry Andric if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End)) 18730b57cec5SDimitry Andric return scanBlockEntry(); 18740b57cec5SDimitry Andric 1875*5f757f3fSDimitry Andric if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1))) 18760b57cec5SDimitry Andric return scanKey(); 18770b57cec5SDimitry Andric 1878*5f757f3fSDimitry Andric if (*Current == ':' && 1879*5f757f3fSDimitry Andric (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow)) 18800b57cec5SDimitry Andric return scanValue(); 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric if (*Current == '*') 18830b57cec5SDimitry Andric return scanAliasOrAnchor(true); 18840b57cec5SDimitry Andric 18850b57cec5SDimitry Andric if (*Current == '&') 18860b57cec5SDimitry Andric return scanAliasOrAnchor(false); 18870b57cec5SDimitry Andric 18880b57cec5SDimitry Andric if (*Current == '!') 18890b57cec5SDimitry Andric return scanTag(); 18900b57cec5SDimitry Andric 18910b57cec5SDimitry Andric if (*Current == '|' && !FlowLevel) 18920b57cec5SDimitry Andric return scanBlockScalar(true); 18930b57cec5SDimitry Andric 18940b57cec5SDimitry Andric if (*Current == '>' && !FlowLevel) 18950b57cec5SDimitry Andric return scanBlockScalar(false); 18960b57cec5SDimitry Andric 18970b57cec5SDimitry Andric if (*Current == '\'') 18980b57cec5SDimitry Andric return scanFlowScalar(false); 18990b57cec5SDimitry Andric 19000b57cec5SDimitry Andric if (*Current == '"') 19010b57cec5SDimitry Andric return scanFlowScalar(true); 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric // Get a plain scalar. 19040b57cec5SDimitry Andric StringRef FirstChar(Current, 1); 1905*5f757f3fSDimitry Andric if ((!isBlankOrBreak(Current) && 1906*5f757f3fSDimitry Andric FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) || 1907*5f757f3fSDimitry Andric (FirstChar.find_first_of("?:-") != StringRef::npos && 1908*5f757f3fSDimitry Andric isPlainSafeNonBlank(Current + 1))) 19090b57cec5SDimitry Andric return scanPlainScalar(); 19100b57cec5SDimitry Andric 19115ffd83dbSDimitry Andric setError("Unrecognized character while tokenizing.", Current); 19120b57cec5SDimitry Andric return false; 19130b57cec5SDimitry Andric } 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, 19160b57cec5SDimitry Andric std::error_code *EC) 191781ad6265SDimitry Andric : scanner(new Scanner(Input, SM, ShowColors, EC)) {} 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, 19200b57cec5SDimitry Andric std::error_code *EC) 192181ad6265SDimitry Andric : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {} 19220b57cec5SDimitry Andric 19230b57cec5SDimitry Andric Stream::~Stream() = default; 19240b57cec5SDimitry Andric 19250b57cec5SDimitry Andric bool Stream::failed() { return scanner->failed(); } 19260b57cec5SDimitry Andric 1927e8d8bef9SDimitry Andric void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) { 1928e8d8bef9SDimitry Andric printError(N ? N->getSourceRange() : SMRange(), Msg, Kind); 1929e8d8bef9SDimitry Andric } 1930e8d8bef9SDimitry Andric 1931e8d8bef9SDimitry Andric void Stream::printError(const SMRange &Range, const Twine &Msg, 1932e8d8bef9SDimitry Andric SourceMgr::DiagKind Kind) { 1933e8d8bef9SDimitry Andric scanner->printError(Range.Start, Kind, Msg, Range); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric document_iterator Stream::begin() { 19370b57cec5SDimitry Andric if (CurrentDoc) 19380b57cec5SDimitry Andric report_fatal_error("Can only iterate over the stream once"); 19390b57cec5SDimitry Andric 19400b57cec5SDimitry Andric // Skip Stream-Start. 19410b57cec5SDimitry Andric scanner->getNext(); 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric CurrentDoc.reset(new Document(*this)); 19440b57cec5SDimitry Andric return document_iterator(CurrentDoc); 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric document_iterator Stream::end() { 19480b57cec5SDimitry Andric return document_iterator(); 19490b57cec5SDimitry Andric } 19500b57cec5SDimitry Andric 19510b57cec5SDimitry Andric void Stream::skip() { 19520eae32dcSDimitry Andric for (Document &Doc : *this) 19530eae32dcSDimitry Andric Doc.skip(); 19540b57cec5SDimitry Andric } 19550b57cec5SDimitry Andric 19560b57cec5SDimitry Andric Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, 19570b57cec5SDimitry Andric StringRef T) 19580b57cec5SDimitry Andric : Doc(D), TypeID(Type), Anchor(A), Tag(T) { 19590b57cec5SDimitry Andric SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); 19600b57cec5SDimitry Andric SourceRange = SMRange(Start, Start); 19610b57cec5SDimitry Andric } 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric std::string Node::getVerbatimTag() const { 19640b57cec5SDimitry Andric StringRef Raw = getRawTag(); 19650b57cec5SDimitry Andric if (!Raw.empty() && Raw != "!") { 19660b57cec5SDimitry Andric std::string Ret; 19670b57cec5SDimitry Andric if (Raw.find_last_of('!') == 0) { 19685ffd83dbSDimitry Andric Ret = std::string(Doc->getTagMap().find("!")->second); 19690b57cec5SDimitry Andric Ret += Raw.substr(1); 19700b57cec5SDimitry Andric return Ret; 1971*5f757f3fSDimitry Andric } else if (Raw.starts_with("!!")) { 19725ffd83dbSDimitry Andric Ret = std::string(Doc->getTagMap().find("!!")->second); 19730b57cec5SDimitry Andric Ret += Raw.substr(2); 19740b57cec5SDimitry Andric return Ret; 19750b57cec5SDimitry Andric } else { 19760b57cec5SDimitry Andric StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); 19770b57cec5SDimitry Andric std::map<StringRef, StringRef>::const_iterator It = 19780b57cec5SDimitry Andric Doc->getTagMap().find(TagHandle); 19790b57cec5SDimitry Andric if (It != Doc->getTagMap().end()) 19805ffd83dbSDimitry Andric Ret = std::string(It->second); 19810b57cec5SDimitry Andric else { 19820b57cec5SDimitry Andric Token T; 19830b57cec5SDimitry Andric T.Kind = Token::TK_Tag; 19840b57cec5SDimitry Andric T.Range = TagHandle; 19850b57cec5SDimitry Andric setError(Twine("Unknown tag handle ") + TagHandle, T); 19860b57cec5SDimitry Andric } 19870b57cec5SDimitry Andric Ret += Raw.substr(Raw.find_last_of('!') + 1); 19880b57cec5SDimitry Andric return Ret; 19890b57cec5SDimitry Andric } 19900b57cec5SDimitry Andric } 19910b57cec5SDimitry Andric 19920b57cec5SDimitry Andric switch (getType()) { 19930b57cec5SDimitry Andric case NK_Null: 19940b57cec5SDimitry Andric return "tag:yaml.org,2002:null"; 19950b57cec5SDimitry Andric case NK_Scalar: 19960b57cec5SDimitry Andric case NK_BlockScalar: 19970b57cec5SDimitry Andric // TODO: Tag resolution. 19980b57cec5SDimitry Andric return "tag:yaml.org,2002:str"; 19990b57cec5SDimitry Andric case NK_Mapping: 20000b57cec5SDimitry Andric return "tag:yaml.org,2002:map"; 20010b57cec5SDimitry Andric case NK_Sequence: 20020b57cec5SDimitry Andric return "tag:yaml.org,2002:seq"; 20030b57cec5SDimitry Andric } 20040b57cec5SDimitry Andric 20050b57cec5SDimitry Andric return ""; 20060b57cec5SDimitry Andric } 20070b57cec5SDimitry Andric 20080b57cec5SDimitry Andric Token &Node::peekNext() { 20090b57cec5SDimitry Andric return Doc->peekNext(); 20100b57cec5SDimitry Andric } 20110b57cec5SDimitry Andric 20120b57cec5SDimitry Andric Token Node::getNext() { 20130b57cec5SDimitry Andric return Doc->getNext(); 20140b57cec5SDimitry Andric } 20150b57cec5SDimitry Andric 20160b57cec5SDimitry Andric Node *Node::parseBlockNode() { 20170b57cec5SDimitry Andric return Doc->parseBlockNode(); 20180b57cec5SDimitry Andric } 20190b57cec5SDimitry Andric 20200b57cec5SDimitry Andric BumpPtrAllocator &Node::getAllocator() { 20210b57cec5SDimitry Andric return Doc->NodeAllocator; 20220b57cec5SDimitry Andric } 20230b57cec5SDimitry Andric 20240b57cec5SDimitry Andric void Node::setError(const Twine &Msg, Token &Tok) const { 20250b57cec5SDimitry Andric Doc->setError(Msg, Tok); 20260b57cec5SDimitry Andric } 20270b57cec5SDimitry Andric 20280b57cec5SDimitry Andric bool Node::failed() const { 20290b57cec5SDimitry Andric return Doc->failed(); 20300b57cec5SDimitry Andric } 20310b57cec5SDimitry Andric 20320b57cec5SDimitry Andric StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { 2033*5f757f3fSDimitry Andric if (Value[0] == '"') 2034*5f757f3fSDimitry Andric return getDoubleQuotedValue(Value, Storage); 2035*5f757f3fSDimitry Andric if (Value[0] == '\'') 2036*5f757f3fSDimitry Andric return getSingleQuotedValue(Value, Storage); 2037*5f757f3fSDimitry Andric return getPlainValue(Value, Storage); 2038*5f757f3fSDimitry Andric } 2039*5f757f3fSDimitry Andric 2040*5f757f3fSDimitry Andric /// parseScalarValue - A common parsing routine for all flow scalar styles. 2041*5f757f3fSDimitry Andric /// It handles line break characters by itself, adds regular content characters 2042*5f757f3fSDimitry Andric /// to the result, and forwards escaped sequences to the provided routine for 2043*5f757f3fSDimitry Andric /// the style-specific processing. 2044*5f757f3fSDimitry Andric /// 2045*5f757f3fSDimitry Andric /// \param UnquotedValue - An input value without quotation marks. 2046*5f757f3fSDimitry Andric /// \param Storage - A storage for the result if the input value is multiline or 2047*5f757f3fSDimitry Andric /// contains escaped characters. 2048*5f757f3fSDimitry Andric /// \param LookupChars - A set of special characters to search in the input 2049*5f757f3fSDimitry Andric /// string. Should include line break characters and the escape character 2050*5f757f3fSDimitry Andric /// specific for the processing scalar style, if any. 2051*5f757f3fSDimitry Andric /// \param UnescapeCallback - This is called when the escape character is found 2052*5f757f3fSDimitry Andric /// in the input. 2053*5f757f3fSDimitry Andric /// \returns - The unfolded and unescaped value. 2054*5f757f3fSDimitry Andric static StringRef 2055*5f757f3fSDimitry Andric parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage, 2056*5f757f3fSDimitry Andric StringRef LookupChars, 2057*5f757f3fSDimitry Andric std::function<StringRef(StringRef, SmallVectorImpl<char> &)> 2058*5f757f3fSDimitry Andric UnescapeCallback) { 2059*5f757f3fSDimitry Andric size_t I = UnquotedValue.find_first_of(LookupChars); 2060*5f757f3fSDimitry Andric if (I == StringRef::npos) 20610b57cec5SDimitry Andric return UnquotedValue; 2062*5f757f3fSDimitry Andric 20630b57cec5SDimitry Andric Storage.clear(); 20640b57cec5SDimitry Andric Storage.reserve(UnquotedValue.size()); 2065*5f757f3fSDimitry Andric char LastNewLineAddedAs = '\0'; 2066*5f757f3fSDimitry Andric for (; I != StringRef::npos; I = UnquotedValue.find_first_of(LookupChars)) { 2067*5f757f3fSDimitry Andric if (UnquotedValue[I] != '\r' && UnquotedValue[I] != '\n') { 2068*5f757f3fSDimitry Andric llvm::append_range(Storage, UnquotedValue.take_front(I)); 2069*5f757f3fSDimitry Andric UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage); 2070*5f757f3fSDimitry Andric LastNewLineAddedAs = '\0'; 2071*5f757f3fSDimitry Andric continue; 2072*5f757f3fSDimitry Andric } 2073*5f757f3fSDimitry Andric if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(" \t", I); 2074*5f757f3fSDimitry Andric LastNonSWhite != StringRef::npos) { 2075*5f757f3fSDimitry Andric llvm::append_range(Storage, UnquotedValue.take_front(LastNonSWhite + 1)); 2076*5f757f3fSDimitry Andric Storage.push_back(' '); 2077*5f757f3fSDimitry Andric LastNewLineAddedAs = ' '; 2078*5f757f3fSDimitry Andric } else { 2079*5f757f3fSDimitry Andric // Note: we can't just check if the last character in Storage is ' ', 2080*5f757f3fSDimitry Andric // '\n', or something else; that would give a wrong result for double 2081*5f757f3fSDimitry Andric // quoted values containing an escaped space character before a new-line 2082*5f757f3fSDimitry Andric // character. 2083*5f757f3fSDimitry Andric switch (LastNewLineAddedAs) { 2084*5f757f3fSDimitry Andric case ' ': 2085*5f757f3fSDimitry Andric assert(!Storage.empty() && Storage.back() == ' '); 2086*5f757f3fSDimitry Andric Storage.back() = '\n'; 2087*5f757f3fSDimitry Andric LastNewLineAddedAs = '\n'; 2088*5f757f3fSDimitry Andric break; 2089*5f757f3fSDimitry Andric case '\n': 2090*5f757f3fSDimitry Andric assert(!Storage.empty() && Storage.back() == '\n'); 2091*5f757f3fSDimitry Andric Storage.push_back('\n'); 2092*5f757f3fSDimitry Andric break; 2093*5f757f3fSDimitry Andric default: 2094*5f757f3fSDimitry Andric Storage.push_back(' '); 2095*5f757f3fSDimitry Andric LastNewLineAddedAs = ' '; 2096*5f757f3fSDimitry Andric break; 2097*5f757f3fSDimitry Andric } 2098*5f757f3fSDimitry Andric } 2099*5f757f3fSDimitry Andric // Handle Windows-style EOL 2100*5f757f3fSDimitry Andric if (UnquotedValue.substr(I, 2) == "\r\n") 2101*5f757f3fSDimitry Andric I++; 2102*5f757f3fSDimitry Andric UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim(" \t"); 21030b57cec5SDimitry Andric } 2104e8d8bef9SDimitry Andric llvm::append_range(Storage, UnquotedValue); 21050b57cec5SDimitry Andric return StringRef(Storage.begin(), Storage.size()); 21060b57cec5SDimitry Andric } 21070b57cec5SDimitry Andric 2108*5f757f3fSDimitry Andric StringRef 2109*5f757f3fSDimitry Andric ScalarNode::getDoubleQuotedValue(StringRef RawValue, 2110*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) const { 2111*5f757f3fSDimitry Andric assert(RawValue.size() >= 2 && RawValue.front() == '"' && 2112*5f757f3fSDimitry Andric RawValue.back() == '"'); 2113*5f757f3fSDimitry Andric StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2); 21140b57cec5SDimitry Andric 2115*5f757f3fSDimitry Andric auto UnescapeFunc = [this](StringRef UnquotedValue, 2116*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) { 2117*5f757f3fSDimitry Andric assert(UnquotedValue.take_front(1) == "\\"); 2118480093f4SDimitry Andric if (UnquotedValue.size() == 1) { 2119480093f4SDimitry Andric Token T; 2120*5f757f3fSDimitry Andric T.Range = UnquotedValue; 2121480093f4SDimitry Andric setError("Unrecognized escape code", T); 2122*5f757f3fSDimitry Andric Storage.clear(); 2123*5f757f3fSDimitry Andric return StringRef(); 2124480093f4SDimitry Andric } 2125*5f757f3fSDimitry Andric UnquotedValue = UnquotedValue.drop_front(1); 21260b57cec5SDimitry Andric switch (UnquotedValue[0]) { 21270b57cec5SDimitry Andric default: { 21280b57cec5SDimitry Andric Token T; 2129*5f757f3fSDimitry Andric T.Range = UnquotedValue.take_front(1); 2130480093f4SDimitry Andric setError("Unrecognized escape code", T); 2131*5f757f3fSDimitry Andric Storage.clear(); 2132*5f757f3fSDimitry Andric return StringRef(); 21330b57cec5SDimitry Andric } 21340b57cec5SDimitry Andric case '\r': 2135*5f757f3fSDimitry Andric // Shrink the Windows-style EOL. 2136*5f757f3fSDimitry Andric if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\n') 2137*5f757f3fSDimitry Andric UnquotedValue = UnquotedValue.drop_front(1); 2138*5f757f3fSDimitry Andric [[fallthrough]]; 21390b57cec5SDimitry Andric case '\n': 2140*5f757f3fSDimitry Andric return UnquotedValue.drop_front(1).ltrim(" \t"); 21410b57cec5SDimitry Andric case '0': 21420b57cec5SDimitry Andric Storage.push_back(0x00); 21430b57cec5SDimitry Andric break; 21440b57cec5SDimitry Andric case 'a': 21450b57cec5SDimitry Andric Storage.push_back(0x07); 21460b57cec5SDimitry Andric break; 21470b57cec5SDimitry Andric case 'b': 21480b57cec5SDimitry Andric Storage.push_back(0x08); 21490b57cec5SDimitry Andric break; 21500b57cec5SDimitry Andric case 't': 21510b57cec5SDimitry Andric case 0x09: 21520b57cec5SDimitry Andric Storage.push_back(0x09); 21530b57cec5SDimitry Andric break; 21540b57cec5SDimitry Andric case 'n': 21550b57cec5SDimitry Andric Storage.push_back(0x0A); 21560b57cec5SDimitry Andric break; 21570b57cec5SDimitry Andric case 'v': 21580b57cec5SDimitry Andric Storage.push_back(0x0B); 21590b57cec5SDimitry Andric break; 21600b57cec5SDimitry Andric case 'f': 21610b57cec5SDimitry Andric Storage.push_back(0x0C); 21620b57cec5SDimitry Andric break; 21630b57cec5SDimitry Andric case 'r': 21640b57cec5SDimitry Andric Storage.push_back(0x0D); 21650b57cec5SDimitry Andric break; 21660b57cec5SDimitry Andric case 'e': 21670b57cec5SDimitry Andric Storage.push_back(0x1B); 21680b57cec5SDimitry Andric break; 21690b57cec5SDimitry Andric case ' ': 21700b57cec5SDimitry Andric Storage.push_back(0x20); 21710b57cec5SDimitry Andric break; 21720b57cec5SDimitry Andric case '"': 21730b57cec5SDimitry Andric Storage.push_back(0x22); 21740b57cec5SDimitry Andric break; 21750b57cec5SDimitry Andric case '/': 21760b57cec5SDimitry Andric Storage.push_back(0x2F); 21770b57cec5SDimitry Andric break; 21780b57cec5SDimitry Andric case '\\': 21790b57cec5SDimitry Andric Storage.push_back(0x5C); 21800b57cec5SDimitry Andric break; 21810b57cec5SDimitry Andric case 'N': 21820b57cec5SDimitry Andric encodeUTF8(0x85, Storage); 21830b57cec5SDimitry Andric break; 21840b57cec5SDimitry Andric case '_': 21850b57cec5SDimitry Andric encodeUTF8(0xA0, Storage); 21860b57cec5SDimitry Andric break; 21870b57cec5SDimitry Andric case 'L': 21880b57cec5SDimitry Andric encodeUTF8(0x2028, Storage); 21890b57cec5SDimitry Andric break; 21900b57cec5SDimitry Andric case 'P': 21910b57cec5SDimitry Andric encodeUTF8(0x2029, Storage); 21920b57cec5SDimitry Andric break; 21930b57cec5SDimitry Andric case 'x': { 21940b57cec5SDimitry Andric if (UnquotedValue.size() < 3) 21950b57cec5SDimitry Andric // TODO: Report error. 21960b57cec5SDimitry Andric break; 21970b57cec5SDimitry Andric unsigned int UnicodeScalarValue; 21980b57cec5SDimitry Andric if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) 21990b57cec5SDimitry Andric // TODO: Report error. 22000b57cec5SDimitry Andric UnicodeScalarValue = 0xFFFD; 22010b57cec5SDimitry Andric encodeUTF8(UnicodeScalarValue, Storage); 2202*5f757f3fSDimitry Andric return UnquotedValue.drop_front(3); 22030b57cec5SDimitry Andric } 22040b57cec5SDimitry Andric case 'u': { 22050b57cec5SDimitry Andric if (UnquotedValue.size() < 5) 22060b57cec5SDimitry Andric // TODO: Report error. 22070b57cec5SDimitry Andric break; 22080b57cec5SDimitry Andric unsigned int UnicodeScalarValue; 22090b57cec5SDimitry Andric if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) 22100b57cec5SDimitry Andric // TODO: Report error. 22110b57cec5SDimitry Andric UnicodeScalarValue = 0xFFFD; 22120b57cec5SDimitry Andric encodeUTF8(UnicodeScalarValue, Storage); 2213*5f757f3fSDimitry Andric return UnquotedValue.drop_front(5); 22140b57cec5SDimitry Andric } 22150b57cec5SDimitry Andric case 'U': { 22160b57cec5SDimitry Andric if (UnquotedValue.size() < 9) 22170b57cec5SDimitry Andric // TODO: Report error. 22180b57cec5SDimitry Andric break; 22190b57cec5SDimitry Andric unsigned int UnicodeScalarValue; 22200b57cec5SDimitry Andric if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) 22210b57cec5SDimitry Andric // TODO: Report error. 22220b57cec5SDimitry Andric UnicodeScalarValue = 0xFFFD; 22230b57cec5SDimitry Andric encodeUTF8(UnicodeScalarValue, Storage); 2224*5f757f3fSDimitry Andric return UnquotedValue.drop_front(9); 22250b57cec5SDimitry Andric } 22260b57cec5SDimitry Andric } 2227*5f757f3fSDimitry Andric return UnquotedValue.drop_front(1); 2228*5f757f3fSDimitry Andric }; 2229*5f757f3fSDimitry Andric 2230*5f757f3fSDimitry Andric return parseScalarValue(UnquotedValue, Storage, "\\\r\n", UnescapeFunc); 22310b57cec5SDimitry Andric } 2232*5f757f3fSDimitry Andric 2233*5f757f3fSDimitry Andric StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue, 2234*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) { 2235*5f757f3fSDimitry Andric assert(RawValue.size() >= 2 && RawValue.front() == '\'' && 2236*5f757f3fSDimitry Andric RawValue.back() == '\''); 2237*5f757f3fSDimitry Andric StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2); 2238*5f757f3fSDimitry Andric 2239*5f757f3fSDimitry Andric auto UnescapeFunc = [](StringRef UnquotedValue, 2240*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) { 2241*5f757f3fSDimitry Andric assert(UnquotedValue.take_front(2) == "''"); 2242*5f757f3fSDimitry Andric Storage.push_back('\''); 2243*5f757f3fSDimitry Andric return UnquotedValue.drop_front(2); 2244*5f757f3fSDimitry Andric }; 2245*5f757f3fSDimitry Andric 2246*5f757f3fSDimitry Andric return parseScalarValue(UnquotedValue, Storage, "'\r\n", UnescapeFunc); 22470b57cec5SDimitry Andric } 2248*5f757f3fSDimitry Andric 2249*5f757f3fSDimitry Andric StringRef ScalarNode::getPlainValue(StringRef RawValue, 2250*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) { 2251*5f757f3fSDimitry Andric // Trim trailing whitespace ('b-char' and 's-white'). 2252*5f757f3fSDimitry Andric // NOTE: Alternatively we could change the scanner to not include whitespace 2253*5f757f3fSDimitry Andric // here in the first place. 2254*5f757f3fSDimitry Andric RawValue = RawValue.rtrim("\r\n \t"); 2255*5f757f3fSDimitry Andric return parseScalarValue(RawValue, Storage, "\r\n", nullptr); 22560b57cec5SDimitry Andric } 22570b57cec5SDimitry Andric 22580b57cec5SDimitry Andric Node *KeyValueNode::getKey() { 22590b57cec5SDimitry Andric if (Key) 22600b57cec5SDimitry Andric return Key; 22610b57cec5SDimitry Andric // Handle implicit null keys. 22620b57cec5SDimitry Andric { 22630b57cec5SDimitry Andric Token &t = peekNext(); 22640b57cec5SDimitry Andric if ( t.Kind == Token::TK_BlockEnd 22650b57cec5SDimitry Andric || t.Kind == Token::TK_Value 22660b57cec5SDimitry Andric || t.Kind == Token::TK_Error) { 22670b57cec5SDimitry Andric return Key = new (getAllocator()) NullNode(Doc); 22680b57cec5SDimitry Andric } 22690b57cec5SDimitry Andric if (t.Kind == Token::TK_Key) 22700b57cec5SDimitry Andric getNext(); // skip TK_Key. 22710b57cec5SDimitry Andric } 22720b57cec5SDimitry Andric 22730b57cec5SDimitry Andric // Handle explicit null keys. 22740b57cec5SDimitry Andric Token &t = peekNext(); 22750b57cec5SDimitry Andric if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { 22760b57cec5SDimitry Andric return Key = new (getAllocator()) NullNode(Doc); 22770b57cec5SDimitry Andric } 22780b57cec5SDimitry Andric 22790b57cec5SDimitry Andric // We've got a normal key. 22800b57cec5SDimitry Andric return Key = parseBlockNode(); 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric 22830b57cec5SDimitry Andric Node *KeyValueNode::getValue() { 22840b57cec5SDimitry Andric if (Value) 22850b57cec5SDimitry Andric return Value; 2286480093f4SDimitry Andric 2287480093f4SDimitry Andric if (Node* Key = getKey()) 2288480093f4SDimitry Andric Key->skip(); 2289480093f4SDimitry Andric else { 2290480093f4SDimitry Andric setError("Null key in Key Value.", peekNext()); 2291480093f4SDimitry Andric return Value = new (getAllocator()) NullNode(Doc); 2292480093f4SDimitry Andric } 2293480093f4SDimitry Andric 22940b57cec5SDimitry Andric if (failed()) 22950b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc); 22960b57cec5SDimitry Andric 22970b57cec5SDimitry Andric // Handle implicit null values. 22980b57cec5SDimitry Andric { 22990b57cec5SDimitry Andric Token &t = peekNext(); 23000b57cec5SDimitry Andric if ( t.Kind == Token::TK_BlockEnd 23010b57cec5SDimitry Andric || t.Kind == Token::TK_FlowMappingEnd 23020b57cec5SDimitry Andric || t.Kind == Token::TK_Key 23030b57cec5SDimitry Andric || t.Kind == Token::TK_FlowEntry 23040b57cec5SDimitry Andric || t.Kind == Token::TK_Error) { 23050b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc); 23060b57cec5SDimitry Andric } 23070b57cec5SDimitry Andric 23080b57cec5SDimitry Andric if (t.Kind != Token::TK_Value) { 23090b57cec5SDimitry Andric setError("Unexpected token in Key Value.", t); 23100b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc); 23110b57cec5SDimitry Andric } 23120b57cec5SDimitry Andric getNext(); // skip TK_Value. 23130b57cec5SDimitry Andric } 23140b57cec5SDimitry Andric 23150b57cec5SDimitry Andric // Handle explicit null values. 23160b57cec5SDimitry Andric Token &t = peekNext(); 23170b57cec5SDimitry Andric if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { 23180b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc); 23190b57cec5SDimitry Andric } 23200b57cec5SDimitry Andric 23210b57cec5SDimitry Andric // We got a normal value. 23220b57cec5SDimitry Andric return Value = parseBlockNode(); 23230b57cec5SDimitry Andric } 23240b57cec5SDimitry Andric 23250b57cec5SDimitry Andric void MappingNode::increment() { 23260b57cec5SDimitry Andric if (failed()) { 23270b57cec5SDimitry Andric IsAtEnd = true; 23280b57cec5SDimitry Andric CurrentEntry = nullptr; 23290b57cec5SDimitry Andric return; 23300b57cec5SDimitry Andric } 23310b57cec5SDimitry Andric if (CurrentEntry) { 23320b57cec5SDimitry Andric CurrentEntry->skip(); 23330b57cec5SDimitry Andric if (Type == MT_Inline) { 23340b57cec5SDimitry Andric IsAtEnd = true; 23350b57cec5SDimitry Andric CurrentEntry = nullptr; 23360b57cec5SDimitry Andric return; 23370b57cec5SDimitry Andric } 23380b57cec5SDimitry Andric } 23390b57cec5SDimitry Andric Token T = peekNext(); 23400b57cec5SDimitry Andric if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { 23410b57cec5SDimitry Andric // KeyValueNode eats the TK_Key. That way it can detect null keys. 23420b57cec5SDimitry Andric CurrentEntry = new (getAllocator()) KeyValueNode(Doc); 23430b57cec5SDimitry Andric } else if (Type == MT_Block) { 23440b57cec5SDimitry Andric switch (T.Kind) { 23450b57cec5SDimitry Andric case Token::TK_BlockEnd: 23460b57cec5SDimitry Andric getNext(); 23470b57cec5SDimitry Andric IsAtEnd = true; 23480b57cec5SDimitry Andric CurrentEntry = nullptr; 23490b57cec5SDimitry Andric break; 23500b57cec5SDimitry Andric default: 23510b57cec5SDimitry Andric setError("Unexpected token. Expected Key or Block End", T); 2352bdd1243dSDimitry Andric [[fallthrough]]; 23530b57cec5SDimitry Andric case Token::TK_Error: 23540b57cec5SDimitry Andric IsAtEnd = true; 23550b57cec5SDimitry Andric CurrentEntry = nullptr; 23560b57cec5SDimitry Andric } 23570b57cec5SDimitry Andric } else { 23580b57cec5SDimitry Andric switch (T.Kind) { 23590b57cec5SDimitry Andric case Token::TK_FlowEntry: 23600b57cec5SDimitry Andric // Eat the flow entry and recurse. 23610b57cec5SDimitry Andric getNext(); 23620b57cec5SDimitry Andric return increment(); 23630b57cec5SDimitry Andric case Token::TK_FlowMappingEnd: 23640b57cec5SDimitry Andric getNext(); 2365bdd1243dSDimitry Andric [[fallthrough]]; 23660b57cec5SDimitry Andric case Token::TK_Error: 23670b57cec5SDimitry Andric // Set this to end iterator. 23680b57cec5SDimitry Andric IsAtEnd = true; 23690b57cec5SDimitry Andric CurrentEntry = nullptr; 23700b57cec5SDimitry Andric break; 23710b57cec5SDimitry Andric default: 23720b57cec5SDimitry Andric setError( "Unexpected token. Expected Key, Flow Entry, or Flow " 23730b57cec5SDimitry Andric "Mapping End." 23740b57cec5SDimitry Andric , T); 23750b57cec5SDimitry Andric IsAtEnd = true; 23760b57cec5SDimitry Andric CurrentEntry = nullptr; 23770b57cec5SDimitry Andric } 23780b57cec5SDimitry Andric } 23790b57cec5SDimitry Andric } 23800b57cec5SDimitry Andric 23810b57cec5SDimitry Andric void SequenceNode::increment() { 23820b57cec5SDimitry Andric if (failed()) { 23830b57cec5SDimitry Andric IsAtEnd = true; 23840b57cec5SDimitry Andric CurrentEntry = nullptr; 23850b57cec5SDimitry Andric return; 23860b57cec5SDimitry Andric } 23870b57cec5SDimitry Andric if (CurrentEntry) 23880b57cec5SDimitry Andric CurrentEntry->skip(); 23890b57cec5SDimitry Andric Token T = peekNext(); 23900b57cec5SDimitry Andric if (SeqType == ST_Block) { 23910b57cec5SDimitry Andric switch (T.Kind) { 23920b57cec5SDimitry Andric case Token::TK_BlockEntry: 23930b57cec5SDimitry Andric getNext(); 23940b57cec5SDimitry Andric CurrentEntry = parseBlockNode(); 23950b57cec5SDimitry Andric if (!CurrentEntry) { // An error occurred. 23960b57cec5SDimitry Andric IsAtEnd = true; 23970b57cec5SDimitry Andric CurrentEntry = nullptr; 23980b57cec5SDimitry Andric } 23990b57cec5SDimitry Andric break; 24000b57cec5SDimitry Andric case Token::TK_BlockEnd: 24010b57cec5SDimitry Andric getNext(); 24020b57cec5SDimitry Andric IsAtEnd = true; 24030b57cec5SDimitry Andric CurrentEntry = nullptr; 24040b57cec5SDimitry Andric break; 24050b57cec5SDimitry Andric default: 24060b57cec5SDimitry Andric setError( "Unexpected token. Expected Block Entry or Block End." 24070b57cec5SDimitry Andric , T); 2408bdd1243dSDimitry Andric [[fallthrough]]; 24090b57cec5SDimitry Andric case Token::TK_Error: 24100b57cec5SDimitry Andric IsAtEnd = true; 24110b57cec5SDimitry Andric CurrentEntry = nullptr; 24120b57cec5SDimitry Andric } 24130b57cec5SDimitry Andric } else if (SeqType == ST_Indentless) { 24140b57cec5SDimitry Andric switch (T.Kind) { 24150b57cec5SDimitry Andric case Token::TK_BlockEntry: 24160b57cec5SDimitry Andric getNext(); 24170b57cec5SDimitry Andric CurrentEntry = parseBlockNode(); 24180b57cec5SDimitry Andric if (!CurrentEntry) { // An error occurred. 24190b57cec5SDimitry Andric IsAtEnd = true; 24200b57cec5SDimitry Andric CurrentEntry = nullptr; 24210b57cec5SDimitry Andric } 24220b57cec5SDimitry Andric break; 24230b57cec5SDimitry Andric default: 24240b57cec5SDimitry Andric case Token::TK_Error: 24250b57cec5SDimitry Andric IsAtEnd = true; 24260b57cec5SDimitry Andric CurrentEntry = nullptr; 24270b57cec5SDimitry Andric } 24280b57cec5SDimitry Andric } else if (SeqType == ST_Flow) { 24290b57cec5SDimitry Andric switch (T.Kind) { 24300b57cec5SDimitry Andric case Token::TK_FlowEntry: 24310b57cec5SDimitry Andric // Eat the flow entry and recurse. 24320b57cec5SDimitry Andric getNext(); 24330b57cec5SDimitry Andric WasPreviousTokenFlowEntry = true; 24340b57cec5SDimitry Andric return increment(); 24350b57cec5SDimitry Andric case Token::TK_FlowSequenceEnd: 24360b57cec5SDimitry Andric getNext(); 2437bdd1243dSDimitry Andric [[fallthrough]]; 24380b57cec5SDimitry Andric case Token::TK_Error: 24390b57cec5SDimitry Andric // Set this to end iterator. 24400b57cec5SDimitry Andric IsAtEnd = true; 24410b57cec5SDimitry Andric CurrentEntry = nullptr; 24420b57cec5SDimitry Andric break; 24430b57cec5SDimitry Andric case Token::TK_StreamEnd: 24440b57cec5SDimitry Andric case Token::TK_DocumentEnd: 24450b57cec5SDimitry Andric case Token::TK_DocumentStart: 24460b57cec5SDimitry Andric setError("Could not find closing ]!", T); 24470b57cec5SDimitry Andric // Set this to end iterator. 24480b57cec5SDimitry Andric IsAtEnd = true; 24490b57cec5SDimitry Andric CurrentEntry = nullptr; 24500b57cec5SDimitry Andric break; 24510b57cec5SDimitry Andric default: 24520b57cec5SDimitry Andric if (!WasPreviousTokenFlowEntry) { 24530b57cec5SDimitry Andric setError("Expected , between entries!", T); 24540b57cec5SDimitry Andric IsAtEnd = true; 24550b57cec5SDimitry Andric CurrentEntry = nullptr; 24560b57cec5SDimitry Andric break; 24570b57cec5SDimitry Andric } 24580b57cec5SDimitry Andric // Otherwise it must be a flow entry. 24590b57cec5SDimitry Andric CurrentEntry = parseBlockNode(); 24600b57cec5SDimitry Andric if (!CurrentEntry) { 24610b57cec5SDimitry Andric IsAtEnd = true; 24620b57cec5SDimitry Andric } 24630b57cec5SDimitry Andric WasPreviousTokenFlowEntry = false; 24640b57cec5SDimitry Andric break; 24650b57cec5SDimitry Andric } 24660b57cec5SDimitry Andric } 24670b57cec5SDimitry Andric } 24680b57cec5SDimitry Andric 24690b57cec5SDimitry Andric Document::Document(Stream &S) : stream(S), Root(nullptr) { 24700b57cec5SDimitry Andric // Tag maps starts with two default mappings. 24710b57cec5SDimitry Andric TagMap["!"] = "!"; 24720b57cec5SDimitry Andric TagMap["!!"] = "tag:yaml.org,2002:"; 24730b57cec5SDimitry Andric 24740b57cec5SDimitry Andric if (parseDirectives()) 24750b57cec5SDimitry Andric expectToken(Token::TK_DocumentStart); 24760b57cec5SDimitry Andric Token &T = peekNext(); 24770b57cec5SDimitry Andric if (T.Kind == Token::TK_DocumentStart) 24780b57cec5SDimitry Andric getNext(); 24790b57cec5SDimitry Andric } 24800b57cec5SDimitry Andric 24810b57cec5SDimitry Andric bool Document::skip() { 24820b57cec5SDimitry Andric if (stream.scanner->failed()) 24830b57cec5SDimitry Andric return false; 2484480093f4SDimitry Andric if (!Root && !getRoot()) 2485480093f4SDimitry Andric return false; 24860b57cec5SDimitry Andric Root->skip(); 24870b57cec5SDimitry Andric Token &T = peekNext(); 24880b57cec5SDimitry Andric if (T.Kind == Token::TK_StreamEnd) 24890b57cec5SDimitry Andric return false; 24900b57cec5SDimitry Andric if (T.Kind == Token::TK_DocumentEnd) { 24910b57cec5SDimitry Andric getNext(); 24920b57cec5SDimitry Andric return skip(); 24930b57cec5SDimitry Andric } 24940b57cec5SDimitry Andric return true; 24950b57cec5SDimitry Andric } 24960b57cec5SDimitry Andric 24970b57cec5SDimitry Andric Token &Document::peekNext() { 24980b57cec5SDimitry Andric return stream.scanner->peekNext(); 24990b57cec5SDimitry Andric } 25000b57cec5SDimitry Andric 25010b57cec5SDimitry Andric Token Document::getNext() { 25020b57cec5SDimitry Andric return stream.scanner->getNext(); 25030b57cec5SDimitry Andric } 25040b57cec5SDimitry Andric 25050b57cec5SDimitry Andric void Document::setError(const Twine &Message, Token &Location) const { 25060b57cec5SDimitry Andric stream.scanner->setError(Message, Location.Range.begin()); 25070b57cec5SDimitry Andric } 25080b57cec5SDimitry Andric 25090b57cec5SDimitry Andric bool Document::failed() const { 25100b57cec5SDimitry Andric return stream.scanner->failed(); 25110b57cec5SDimitry Andric } 25120b57cec5SDimitry Andric 25130b57cec5SDimitry Andric Node *Document::parseBlockNode() { 25140b57cec5SDimitry Andric Token T = peekNext(); 25150b57cec5SDimitry Andric // Handle properties. 25160b57cec5SDimitry Andric Token AnchorInfo; 25170b57cec5SDimitry Andric Token TagInfo; 25180b57cec5SDimitry Andric parse_property: 25190b57cec5SDimitry Andric switch (T.Kind) { 25200b57cec5SDimitry Andric case Token::TK_Alias: 25210b57cec5SDimitry Andric getNext(); 25220b57cec5SDimitry Andric return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); 25230b57cec5SDimitry Andric case Token::TK_Anchor: 25240b57cec5SDimitry Andric if (AnchorInfo.Kind == Token::TK_Anchor) { 25250b57cec5SDimitry Andric setError("Already encountered an anchor for this node!", T); 25260b57cec5SDimitry Andric return nullptr; 25270b57cec5SDimitry Andric } 25280b57cec5SDimitry Andric AnchorInfo = getNext(); // Consume TK_Anchor. 25290b57cec5SDimitry Andric T = peekNext(); 25300b57cec5SDimitry Andric goto parse_property; 25310b57cec5SDimitry Andric case Token::TK_Tag: 25320b57cec5SDimitry Andric if (TagInfo.Kind == Token::TK_Tag) { 25330b57cec5SDimitry Andric setError("Already encountered a tag for this node!", T); 25340b57cec5SDimitry Andric return nullptr; 25350b57cec5SDimitry Andric } 25360b57cec5SDimitry Andric TagInfo = getNext(); // Consume TK_Tag. 25370b57cec5SDimitry Andric T = peekNext(); 25380b57cec5SDimitry Andric goto parse_property; 25390b57cec5SDimitry Andric default: 25400b57cec5SDimitry Andric break; 25410b57cec5SDimitry Andric } 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric switch (T.Kind) { 25440b57cec5SDimitry Andric case Token::TK_BlockEntry: 25450b57cec5SDimitry Andric // We got an unindented BlockEntry sequence. This is not terminated with 25460b57cec5SDimitry Andric // a BlockEnd. 25470b57cec5SDimitry Andric // Don't eat the TK_BlockEntry, SequenceNode needs it. 25480b57cec5SDimitry Andric return new (NodeAllocator) SequenceNode( stream.CurrentDoc 25490b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 25500b57cec5SDimitry Andric , TagInfo.Range 25510b57cec5SDimitry Andric , SequenceNode::ST_Indentless); 25520b57cec5SDimitry Andric case Token::TK_BlockSequenceStart: 25530b57cec5SDimitry Andric getNext(); 25540b57cec5SDimitry Andric return new (NodeAllocator) 25550b57cec5SDimitry Andric SequenceNode( stream.CurrentDoc 25560b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 25570b57cec5SDimitry Andric , TagInfo.Range 25580b57cec5SDimitry Andric , SequenceNode::ST_Block); 25590b57cec5SDimitry Andric case Token::TK_BlockMappingStart: 25600b57cec5SDimitry Andric getNext(); 25610b57cec5SDimitry Andric return new (NodeAllocator) 25620b57cec5SDimitry Andric MappingNode( stream.CurrentDoc 25630b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 25640b57cec5SDimitry Andric , TagInfo.Range 25650b57cec5SDimitry Andric , MappingNode::MT_Block); 25660b57cec5SDimitry Andric case Token::TK_FlowSequenceStart: 25670b57cec5SDimitry Andric getNext(); 25680b57cec5SDimitry Andric return new (NodeAllocator) 25690b57cec5SDimitry Andric SequenceNode( stream.CurrentDoc 25700b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 25710b57cec5SDimitry Andric , TagInfo.Range 25720b57cec5SDimitry Andric , SequenceNode::ST_Flow); 25730b57cec5SDimitry Andric case Token::TK_FlowMappingStart: 25740b57cec5SDimitry Andric getNext(); 25750b57cec5SDimitry Andric return new (NodeAllocator) 25760b57cec5SDimitry Andric MappingNode( stream.CurrentDoc 25770b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 25780b57cec5SDimitry Andric , TagInfo.Range 25790b57cec5SDimitry Andric , MappingNode::MT_Flow); 25800b57cec5SDimitry Andric case Token::TK_Scalar: 25810b57cec5SDimitry Andric getNext(); 25820b57cec5SDimitry Andric return new (NodeAllocator) 25830b57cec5SDimitry Andric ScalarNode( stream.CurrentDoc 25840b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 25850b57cec5SDimitry Andric , TagInfo.Range 25860b57cec5SDimitry Andric , T.Range); 25870b57cec5SDimitry Andric case Token::TK_BlockScalar: { 25880b57cec5SDimitry Andric getNext(); 25890b57cec5SDimitry Andric StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); 25900b57cec5SDimitry Andric StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); 25910b57cec5SDimitry Andric return new (NodeAllocator) 25920b57cec5SDimitry Andric BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), 25930b57cec5SDimitry Andric TagInfo.Range, StrCopy, T.Range); 25940b57cec5SDimitry Andric } 25950b57cec5SDimitry Andric case Token::TK_Key: 25960b57cec5SDimitry Andric // Don't eat the TK_Key, KeyValueNode expects it. 25970b57cec5SDimitry Andric return new (NodeAllocator) 25980b57cec5SDimitry Andric MappingNode( stream.CurrentDoc 25990b57cec5SDimitry Andric , AnchorInfo.Range.substr(1) 26000b57cec5SDimitry Andric , TagInfo.Range 26010b57cec5SDimitry Andric , MappingNode::MT_Inline); 26020b57cec5SDimitry Andric case Token::TK_DocumentStart: 26030b57cec5SDimitry Andric case Token::TK_DocumentEnd: 26040b57cec5SDimitry Andric case Token::TK_StreamEnd: 26050b57cec5SDimitry Andric default: 26060b57cec5SDimitry Andric // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not 26070b57cec5SDimitry Andric // !!null null. 26080b57cec5SDimitry Andric return new (NodeAllocator) NullNode(stream.CurrentDoc); 2609480093f4SDimitry Andric case Token::TK_FlowMappingEnd: 2610480093f4SDimitry Andric case Token::TK_FlowSequenceEnd: 2611480093f4SDimitry Andric case Token::TK_FlowEntry: { 2612480093f4SDimitry Andric if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root))) 2613480093f4SDimitry Andric return new (NodeAllocator) NullNode(stream.CurrentDoc); 2614480093f4SDimitry Andric 2615480093f4SDimitry Andric setError("Unexpected token", T); 2616480093f4SDimitry Andric return nullptr; 2617480093f4SDimitry Andric } 26180b57cec5SDimitry Andric case Token::TK_Error: 26190b57cec5SDimitry Andric return nullptr; 26200b57cec5SDimitry Andric } 26210b57cec5SDimitry Andric llvm_unreachable("Control flow shouldn't reach here."); 26220b57cec5SDimitry Andric return nullptr; 26230b57cec5SDimitry Andric } 26240b57cec5SDimitry Andric 26250b57cec5SDimitry Andric bool Document::parseDirectives() { 26260b57cec5SDimitry Andric bool isDirective = false; 26270b57cec5SDimitry Andric while (true) { 26280b57cec5SDimitry Andric Token T = peekNext(); 26290b57cec5SDimitry Andric if (T.Kind == Token::TK_TagDirective) { 26300b57cec5SDimitry Andric parseTAGDirective(); 26310b57cec5SDimitry Andric isDirective = true; 26320b57cec5SDimitry Andric } else if (T.Kind == Token::TK_VersionDirective) { 26330b57cec5SDimitry Andric parseYAMLDirective(); 26340b57cec5SDimitry Andric isDirective = true; 26350b57cec5SDimitry Andric } else 26360b57cec5SDimitry Andric break; 26370b57cec5SDimitry Andric } 26380b57cec5SDimitry Andric return isDirective; 26390b57cec5SDimitry Andric } 26400b57cec5SDimitry Andric 26410b57cec5SDimitry Andric void Document::parseYAMLDirective() { 26420b57cec5SDimitry Andric getNext(); // Eat %YAML <version> 26430b57cec5SDimitry Andric } 26440b57cec5SDimitry Andric 26450b57cec5SDimitry Andric void Document::parseTAGDirective() { 26460b57cec5SDimitry Andric Token Tag = getNext(); // %TAG <handle> <prefix> 26470b57cec5SDimitry Andric StringRef T = Tag.Range; 26480b57cec5SDimitry Andric // Strip %TAG 26490b57cec5SDimitry Andric T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); 26500b57cec5SDimitry Andric std::size_t HandleEnd = T.find_first_of(" \t"); 26510b57cec5SDimitry Andric StringRef TagHandle = T.substr(0, HandleEnd); 26520b57cec5SDimitry Andric StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); 26530b57cec5SDimitry Andric TagMap[TagHandle] = TagPrefix; 26540b57cec5SDimitry Andric } 26550b57cec5SDimitry Andric 26560b57cec5SDimitry Andric bool Document::expectToken(int TK) { 26570b57cec5SDimitry Andric Token T = getNext(); 26580b57cec5SDimitry Andric if (T.Kind != TK) { 26590b57cec5SDimitry Andric setError("Unexpected token", T); 26600b57cec5SDimitry Andric return false; 26610b57cec5SDimitry Andric } 26620b57cec5SDimitry Andric return true; 26630b57cec5SDimitry Andric } 2664