10b57cec5SDimitry Andric //===- TokenRewriter.cpp - Token-based code rewriting interface -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the TokenRewriter class, which is used for code
100b57cec5SDimitry Andric // transformations.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "clang/Rewrite/Core/TokenRewriter.h"
150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
160b57cec5SDimitry Andric #include "clang/Lex/Lexer.h"
170b57cec5SDimitry Andric #include "clang/Lex/ScratchBuffer.h"
180b57cec5SDimitry Andric #include "clang/Lex/Token.h"
190b57cec5SDimitry Andric #include <cassert>
200b57cec5SDimitry Andric #include <cstring>
210b57cec5SDimitry Andric #include <map>
220b57cec5SDimitry Andric #include <utility>
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric using namespace clang;
250b57cec5SDimitry Andric
TokenRewriter(FileID FID,SourceManager & SM,const LangOptions & LangOpts)260b57cec5SDimitry Andric TokenRewriter::TokenRewriter(FileID FID, SourceManager &SM,
270b57cec5SDimitry Andric const LangOptions &LangOpts) {
280b57cec5SDimitry Andric ScratchBuf.reset(new ScratchBuffer(SM));
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric // Create a lexer to lex all the tokens of the main file in raw mode.
31*e8d8bef9SDimitry Andric llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
320b57cec5SDimitry Andric Lexer RawLex(FID, FromFile, SM, LangOpts);
330b57cec5SDimitry Andric
340b57cec5SDimitry Andric // Return all comments and whitespace as tokens.
350b57cec5SDimitry Andric RawLex.SetKeepWhitespaceMode(true);
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric // Lex the file, populating our datastructures.
380b57cec5SDimitry Andric Token RawTok;
390b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawTok);
400b57cec5SDimitry Andric while (RawTok.isNot(tok::eof)) {
410b57cec5SDimitry Andric #if 0
420b57cec5SDimitry Andric if (Tok.is(tok::raw_identifier)) {
430b57cec5SDimitry Andric // Look up the identifier info for the token. This should use
440b57cec5SDimitry Andric // IdentifierTable directly instead of PP.
450b57cec5SDimitry Andric PP.LookUpIdentifierInfo(Tok);
460b57cec5SDimitry Andric }
470b57cec5SDimitry Andric #endif
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric AddToken(RawTok, TokenList.end());
500b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawTok);
510b57cec5SDimitry Andric }
520b57cec5SDimitry Andric }
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric TokenRewriter::~TokenRewriter() = default;
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric /// RemapIterator - Convert from token_iterator (a const iterator) to
570b57cec5SDimitry Andric /// TokenRefTy (a non-const iterator).
RemapIterator(token_iterator I)580b57cec5SDimitry Andric TokenRewriter::TokenRefTy TokenRewriter::RemapIterator(token_iterator I) {
590b57cec5SDimitry Andric if (I == token_end()) return TokenList.end();
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric // FIXME: This is horrible, we should use our own list or something to avoid
620b57cec5SDimitry Andric // this.
630b57cec5SDimitry Andric std::map<SourceLocation, TokenRefTy>::iterator MapIt =
640b57cec5SDimitry Andric TokenAtLoc.find(I->getLocation());
650b57cec5SDimitry Andric assert(MapIt != TokenAtLoc.end() && "iterator not in rewriter?");
660b57cec5SDimitry Andric return MapIt->second;
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric /// AddToken - Add the specified token into the Rewriter before the other
700b57cec5SDimitry Andric /// position.
710b57cec5SDimitry Andric TokenRewriter::TokenRefTy
AddToken(const Token & T,TokenRefTy Where)720b57cec5SDimitry Andric TokenRewriter::AddToken(const Token &T, TokenRefTy Where) {
730b57cec5SDimitry Andric Where = TokenList.insert(Where, T);
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric bool InsertSuccess = TokenAtLoc.insert(std::make_pair(T.getLocation(),
760b57cec5SDimitry Andric Where)).second;
770b57cec5SDimitry Andric assert(InsertSuccess && "Token location already in rewriter!");
780b57cec5SDimitry Andric (void)InsertSuccess;
790b57cec5SDimitry Andric return Where;
800b57cec5SDimitry Andric }
810b57cec5SDimitry Andric
820b57cec5SDimitry Andric TokenRewriter::token_iterator
AddTokenBefore(token_iterator I,const char * Val)830b57cec5SDimitry Andric TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) {
840b57cec5SDimitry Andric unsigned Len = strlen(Val);
850b57cec5SDimitry Andric
860b57cec5SDimitry Andric // Plop the string into the scratch buffer, then create a token for this
870b57cec5SDimitry Andric // string.
880b57cec5SDimitry Andric Token Tok;
890b57cec5SDimitry Andric Tok.startToken();
900b57cec5SDimitry Andric const char *Spelling;
910b57cec5SDimitry Andric Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling));
920b57cec5SDimitry Andric Tok.setLength(Len);
930b57cec5SDimitry Andric
940b57cec5SDimitry Andric // TODO: Form a whole lexer around this and relex the token! For now, just
950b57cec5SDimitry Andric // set kind to tok::unknown.
960b57cec5SDimitry Andric Tok.setKind(tok::unknown);
970b57cec5SDimitry Andric
980b57cec5SDimitry Andric return AddToken(Tok, RemapIterator(I));
990b57cec5SDimitry Andric }
100