1 //===- RewriteRope.h - Rope specialized for rewriter ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the RewriteRope class, which is a powerful string class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H 14 #define LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H 15 16 #include "llvm/ADT/IntrusiveRefCntPtr.h" 17 #include "llvm/ADT/StringRef.h" 18 #include <cassert> 19 #include <cstddef> 20 #include <iterator> 21 #include <utility> 22 23 namespace clang { 24 25 //===--------------------------------------------------------------------===// 26 // RopeRefCountString Class 27 //===--------------------------------------------------------------------===// 28 29 /// RopeRefCountString - This struct is allocated with 'new char[]' from the 30 /// heap, and represents a reference counted chunk of string data. When its 31 /// ref count drops to zero, it is delete[]'d. This is primarily managed 32 /// through the RopePiece class below. 33 struct RopeRefCountString { 34 unsigned RefCount; 35 char Data[1]; // Variable sized. 36 RetainRopeRefCountString37 void Retain() { ++RefCount; } 38 ReleaseRopeRefCountString39 void Release() { 40 assert(RefCount > 0 && "Reference count is already zero."); 41 if (--RefCount == 0) 42 delete [] (char*)this; 43 } 44 }; 45 46 //===--------------------------------------------------------------------===// 47 // RopePiece Class 48 //===--------------------------------------------------------------------===// 49 50 /// RopePiece - This class represents a view into a RopeRefCountString object. 51 /// This allows references to string data to be efficiently chopped up and 52 /// moved around without having to push around the string data itself. 53 /// 54 /// For example, we could have a 1M RopePiece and want to insert something 55 /// into the middle of it. To do this, we split it into two RopePiece objects 56 /// that both refer to the same underlying RopeRefCountString (just with 57 /// different offsets) which is a nice constant time operation. 58 struct RopePiece { 59 llvm::IntrusiveRefCntPtr<RopeRefCountString> StrData; 60 unsigned StartOffs = 0; 61 unsigned EndOffs = 0; 62 63 RopePiece() = default; RopePieceRopePiece64 RopePiece(llvm::IntrusiveRefCntPtr<RopeRefCountString> Str, unsigned Start, 65 unsigned End) 66 : StrData(std::move(Str)), StartOffs(Start), EndOffs(End) {} 67 68 const char &operator[](unsigned Offset) const { 69 return StrData->Data[Offset+StartOffs]; 70 } 71 char &operator[](unsigned Offset) { 72 return StrData->Data[Offset+StartOffs]; 73 } 74 sizeRopePiece75 unsigned size() const { return EndOffs-StartOffs; } 76 }; 77 78 //===--------------------------------------------------------------------===// 79 // RopePieceBTreeIterator Class 80 //===--------------------------------------------------------------------===// 81 82 /// RopePieceBTreeIterator - This class provides read-only forward iteration 83 /// over bytes that are in a RopePieceBTree. This first iterates over bytes 84 /// in a RopePiece, then iterates over RopePiece's in a RopePieceBTreeLeaf, 85 /// then iterates over RopePieceBTreeLeaf's in a RopePieceBTree. 86 class RopePieceBTreeIterator { 87 /// CurNode - The current B+Tree node that we are inspecting. 88 const void /*RopePieceBTreeLeaf*/ *CurNode = nullptr; 89 90 /// CurPiece - The current RopePiece in the B+Tree node that we're 91 /// inspecting. 92 const RopePiece *CurPiece = nullptr; 93 94 /// CurChar - The current byte in the RopePiece we are pointing to. 95 unsigned CurChar = 0; 96 97 public: 98 using iterator_category = std::forward_iterator_tag; 99 using value_type = const char; 100 using difference_type = std::ptrdiff_t; 101 using pointer = value_type *; 102 using reference = value_type &; 103 104 RopePieceBTreeIterator() = default; 105 RopePieceBTreeIterator(const void /*RopePieceBTreeNode*/ *N); 106 107 char operator*() const { 108 return (*CurPiece)[CurChar]; 109 } 110 111 bool operator==(const RopePieceBTreeIterator &RHS) const { 112 return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar; 113 } 114 bool operator!=(const RopePieceBTreeIterator &RHS) const { 115 return !operator==(RHS); 116 } 117 118 RopePieceBTreeIterator& operator++() { // Preincrement 119 if (CurChar+1 < CurPiece->size()) 120 ++CurChar; 121 else 122 MoveToNextPiece(); 123 return *this; 124 } 125 126 RopePieceBTreeIterator operator++(int) { // Postincrement 127 RopePieceBTreeIterator tmp = *this; ++*this; return tmp; 128 } 129 piece()130 llvm::StringRef piece() const { 131 return llvm::StringRef(&(*CurPiece)[0], CurPiece->size()); 132 } 133 134 void MoveToNextPiece(); 135 }; 136 137 //===--------------------------------------------------------------------===// 138 // RopePieceBTree Class 139 //===--------------------------------------------------------------------===// 140 141 class RopePieceBTree { 142 void /*RopePieceBTreeNode*/ *Root; 143 144 public: 145 RopePieceBTree(); 146 RopePieceBTree(const RopePieceBTree &RHS); 147 RopePieceBTree &operator=(const RopePieceBTree &) = delete; 148 ~RopePieceBTree(); 149 150 using iterator = RopePieceBTreeIterator; 151 begin()152 iterator begin() const { return iterator(Root); } end()153 iterator end() const { return iterator(); } 154 unsigned size() const; empty()155 unsigned empty() const { return size() == 0; } 156 157 void clear(); 158 159 void insert(unsigned Offset, const RopePiece &R); 160 161 void erase(unsigned Offset, unsigned NumBytes); 162 }; 163 164 //===--------------------------------------------------------------------===// 165 // RewriteRope Class 166 //===--------------------------------------------------------------------===// 167 168 /// RewriteRope - A powerful string class. This class supports extremely 169 /// efficient insertions and deletions into the middle of it, even for 170 /// ridiculously long strings. 171 class RewriteRope { 172 RopePieceBTree Chunks; 173 174 /// We allocate space for string data out of a buffer of size AllocChunkSize. 175 /// This keeps track of how much space is left. 176 llvm::IntrusiveRefCntPtr<RopeRefCountString> AllocBuffer; 177 enum { AllocChunkSize = 4080 }; 178 unsigned AllocOffs = AllocChunkSize; 179 180 public: 181 RewriteRope() = default; RewriteRope(const RewriteRope & RHS)182 RewriteRope(const RewriteRope &RHS) : Chunks(RHS.Chunks) {} 183 184 // The copy assignment operator is defined as deleted pending further 185 // motivation. 186 RewriteRope &operator=(const RewriteRope &) = delete; 187 188 using iterator = RopePieceBTree::iterator; 189 using const_iterator = RopePieceBTree::iterator; 190 begin()191 iterator begin() const { return Chunks.begin(); } end()192 iterator end() const { return Chunks.end(); } size()193 unsigned size() const { return Chunks.size(); } 194 clear()195 void clear() { 196 Chunks.clear(); 197 } 198 assign(const char * Start,const char * End)199 void assign(const char *Start, const char *End) { 200 clear(); 201 if (Start != End) 202 Chunks.insert(0, MakeRopeString(Start, End)); 203 } 204 insert(unsigned Offset,const char * Start,const char * End)205 void insert(unsigned Offset, const char *Start, const char *End) { 206 assert(Offset <= size() && "Invalid position to insert!"); 207 if (Start == End) return; 208 Chunks.insert(Offset, MakeRopeString(Start, End)); 209 } 210 erase(unsigned Offset,unsigned NumBytes)211 void erase(unsigned Offset, unsigned NumBytes) { 212 assert(Offset+NumBytes <= size() && "Invalid region to erase!"); 213 if (NumBytes == 0) return; 214 Chunks.erase(Offset, NumBytes); 215 } 216 217 private: 218 RopePiece MakeRopeString(const char *Start, const char *End); 219 }; 220 221 } // namespace clang 222 223 #endif // LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H 224