clang 22.0.0git
BuildTree.cpp File Reference
#include "clang/Tooling/Syntax/BuildTree.h"
#include "clang/AST/ASTFwd.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/IgnoreExpr.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/TypeLocVisitor.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FormatVariadic.h"
#include <map>

Go to the source code of this file.

Functions

static ExprIgnoreImplicitConstructorSingleStep (Expr *E)
static ExprIgnoreCXXFunctionalCastExprWrappingConstructor (Expr *E)
static ExprIgnoreImplicit (Expr *E)
static LLVM_ATTRIBUTE_UNUSED bool isImplicitExpr (Expr *E)
static CallExpr::arg_range dropDefaultArgs (CallExpr::arg_range Args)
static syntax::NodeKind getOperatorNodeKind (const CXXOperatorCallExpr &E)
static SourceLocation getQualifiedNameStart (NamedDecl *D)
 Get the start of the qualified name.
 switch (TL.getTypeLocClass())
bool TraverseNestedNameSpecifierLoc (NestedNameSpecifierLoc QualifierLoc)
syntax::IdExpression * buildIdExpression (NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKeywordLoc, SourceRange UnqualifiedIdLoc, ASTPtr From)
bool WalkUpFromMemberExpr (MemberExpr *S)
bool WalkUpFromDeclRefExpr (DeclRefExpr *S)
bool WalkUpFromDependentScopeDeclRefExpr (DependentScopeDeclRefExpr *S)
bool WalkUpFromCXXThisExpr (CXXThisExpr *S)
bool WalkUpFromParenExpr (ParenExpr *S)
bool WalkUpFromIntegerLiteral (IntegerLiteral *S)
bool WalkUpFromCharacterLiteral (CharacterLiteral *S)
bool WalkUpFromFloatingLiteral (FloatingLiteral *S)
bool WalkUpFromStringLiteral (StringLiteral *S)
bool WalkUpFromCXXBoolLiteralExpr (CXXBoolLiteralExpr *S)
bool WalkUpFromCXXNullPtrLiteralExpr (CXXNullPtrLiteralExpr *S)
bool WalkUpFromUnaryOperator (UnaryOperator *S)
bool WalkUpFromBinaryOperator (BinaryOperator *S)
syntax::CallArgumentsbuildCallArguments (CallExpr::arg_range ArgsAndDefaultArgs)
 Builds CallArguments syntax node from arguments that appear in source code, i.e. not default arguments.
bool WalkUpFromCallExpr (CallExpr *S)
bool WalkUpFromCXXConstructExpr (CXXConstructExpr *S)
bool TraverseCXXOperatorCallExpr (CXXOperatorCallExpr *S)
bool WalkUpFromCXXOperatorCallExpr (CXXOperatorCallExpr *S)
bool WalkUpFromCXXDefaultArgExpr (CXXDefaultArgExpr *S)
bool WalkUpFromNamespaceDecl (NamespaceDecl *S)
bool TraverseParenTypeLoc (ParenTypeLoc L, bool TraverseQualifier)
bool WalkUpFromParenTypeLoc (ParenTypeLoc L)
bool WalkUpFromArrayTypeLoc (ArrayTypeLoc L)
syntax::ParameterDeclarationListbuildParameterDeclarationList (ArrayRef< ParmVarDecl * > Params)
bool WalkUpFromFunctionTypeLoc (FunctionTypeLoc L)
bool WalkUpFromFunctionProtoTypeLoc (FunctionProtoTypeLoc L)
bool TraverseMemberPointerTypeLoc (MemberPointerTypeLoc L, bool TraverseQualifier)
bool WalkUpFromMemberPointerTypeLoc (MemberPointerTypeLoc L)
bool WalkUpFromDeclStmt (DeclStmt *S)
bool WalkUpFromNullStmt (NullStmt *S)
bool WalkUpFromSwitchStmt (SwitchStmt *S)
bool WalkUpFromCaseStmt (CaseStmt *S)
bool WalkUpFromDefaultStmt (DefaultStmt *S)
bool WalkUpFromIfStmt (IfStmt *S)
bool WalkUpFromForStmt (ForStmt *S)
bool WalkUpFromWhileStmt (WhileStmt *S)
bool WalkUpFromContinueStmt (ContinueStmt *S)
bool WalkUpFromBreakStmt (BreakStmt *S)
bool WalkUpFromReturnStmt (ReturnStmt *S)
bool WalkUpFromCXXForRangeStmt (CXXForRangeStmt *S)
bool WalkUpFromEmptyDecl (EmptyDecl *S)
bool WalkUpFromStaticAssertDecl (StaticAssertDecl *S)
bool WalkUpFromLinkageSpecDecl (LinkageSpecDecl *S)
bool WalkUpFromNamespaceAliasDecl (NamespaceAliasDecl *S)
bool WalkUpFromUsingDirectiveDecl (UsingDirectiveDecl *S)
bool WalkUpFromUsingDecl (UsingDecl *S)
bool WalkUpFromUnresolvedUsingValueDecl (UnresolvedUsingValueDecl *S)
bool WalkUpFromUnresolvedUsingTypenameDecl (UnresolvedUsingTypenameDecl *S)
bool WalkUpFromTypeAliasDecl (TypeAliasDecl *S)

Variables

 true
 Gets the range of the initializer inside an init-declarator C++ [dcl.decl].
default __pad0__

Function Documentation

◆ buildCallArguments()

syntax::CallArguments * buildCallArguments ( CallExpr::arg_range ArgsAndDefaultArgs)

Builds CallArguments syntax node from arguments that appear in source code, i.e. not default arguments.

Definition at line 1183 of file BuildTree.cpp.

References dropDefaultArgs(), clang::syntax::ListDelimiter, and clang::syntax::ListElement.

Referenced by WalkUpFromCallExpr(), and WalkUpFromCXXOperatorCallExpr().

◆ buildIdExpression()

◆ buildParameterDeclarationList()

syntax::ParameterDeclarationList * buildParameterDeclarationList ( ArrayRef< ParmVarDecl * > Params)

Definition at line 1348 of file BuildTree.cpp.

References clang::syntax::ListDelimiter, and clang::syntax::ListElement.

Referenced by WalkUpFromFunctionTypeLoc().

◆ dropDefaultArgs()

CallExpr::arg_range dropDefaultArgs ( CallExpr::arg_range Args)
static

Definition at line 152 of file BuildTree.cpp.

References clang::isa().

Referenced by buildCallArguments().

◆ getOperatorNodeKind()

◆ getQualifiedNameStart()

SourceLocation getQualifiedNameStart ( NamedDecl * D)
static

Get the start of the qualified name.

In the examples below it gives the location of the ^: int ^a; int *^a; int ^a::S::f(){}

Definition at line 244 of file BuildTree.cpp.

References clang::NamedDecl::getDeclName(), clang::Decl::getLocation(), clang::isa(), and clang::DeclarationName::isIdentifier().

◆ IgnoreCXXFunctionalCastExprWrappingConstructor()

Expr * IgnoreCXXFunctionalCastExprWrappingConstructor ( Expr * E)
static

Definition at line 66 of file BuildTree.cpp.

Referenced by IgnoreImplicit().

◆ IgnoreImplicit()

◆ IgnoreImplicitConstructorSingleStep()

Expr * IgnoreImplicitConstructorSingleStep ( Expr * E)
static

Definition at line 47 of file BuildTree.cpp.

References clang::C, and clang::isa().

Referenced by IgnoreImplicit(), and clang::Expr::IgnoreUnlessSpelledInSource().

◆ isImplicitExpr()

LLVM_ATTRIBUTE_UNUSED bool isImplicitExpr ( Expr * E)
static

Definition at line 81 of file BuildTree.cpp.

References IgnoreImplicit().

◆ switch()

◆ TraverseCXXOperatorCallExpr()

◆ TraverseMemberPointerTypeLoc()

◆ TraverseNestedNameSpecifierLoc()

◆ TraverseParenTypeLoc()

bool TraverseParenTypeLoc ( ParenTypeLoc L,
bool TraverseQualifier )

◆ WalkUpFromArrayTypeLoc()

◆ WalkUpFromBinaryOperator()

◆ WalkUpFromBreakStmt()

bool WalkUpFromBreakStmt ( BreakStmt * S)

◆ WalkUpFromCallExpr()

◆ WalkUpFromCaseStmt()

◆ WalkUpFromCharacterLiteral()

bool WalkUpFromCharacterLiteral ( CharacterLiteral * S)

◆ WalkUpFromContinueStmt()

bool WalkUpFromContinueStmt ( ContinueStmt * S)

◆ WalkUpFromCXXBoolLiteralExpr()

bool WalkUpFromCXXBoolLiteralExpr ( CXXBoolLiteralExpr * S)

◆ WalkUpFromCXXConstructExpr()

◆ WalkUpFromCXXDefaultArgExpr()

bool WalkUpFromCXXDefaultArgExpr ( CXXDefaultArgExpr * S)

Definition at line 1306 of file BuildTree.cpp.

◆ WalkUpFromCXXForRangeStmt()

◆ WalkUpFromCXXNullPtrLiteralExpr()

bool WalkUpFromCXXNullPtrLiteralExpr ( CXXNullPtrLiteralExpr * S)

◆ WalkUpFromCXXOperatorCallExpr()

◆ WalkUpFromCXXThisExpr()

◆ WalkUpFromDeclRefExpr()

◆ WalkUpFromDeclStmt()

bool WalkUpFromDeclStmt ( DeclStmt * S)

Definition at line 1406 of file BuildTree.cpp.

◆ WalkUpFromDefaultStmt()

◆ WalkUpFromDependentScopeDeclRefExpr()

◆ WalkUpFromEmptyDecl()

bool WalkUpFromEmptyDecl ( EmptyDecl * S)

Definition at line 1508 of file BuildTree.cpp.

◆ WalkUpFromFloatingLiteral()

bool WalkUpFromFloatingLiteral ( FloatingLiteral * S)

◆ WalkUpFromForStmt()

◆ WalkUpFromFunctionProtoTypeLoc()

◆ WalkUpFromFunctionTypeLoc()

◆ WalkUpFromIfStmt()

◆ WalkUpFromIntegerLiteral()

bool WalkUpFromIntegerLiteral ( IntegerLiteral * S)

◆ WalkUpFromLinkageSpecDecl()

bool WalkUpFromLinkageSpecDecl ( LinkageSpecDecl * S)

Definition at line 1522 of file BuildTree.cpp.

◆ WalkUpFromMemberExpr()

◆ WalkUpFromMemberPointerTypeLoc()

bool WalkUpFromMemberPointerTypeLoc ( MemberPointerTypeLoc L)

◆ WalkUpFromNamespaceAliasDecl()

bool WalkUpFromNamespaceAliasDecl ( NamespaceAliasDecl * S)

Definition at line 1529 of file BuildTree.cpp.

◆ WalkUpFromNamespaceDecl()

bool WalkUpFromNamespaceDecl ( NamespaceDecl * S)

Definition at line 1308 of file BuildTree.cpp.

◆ WalkUpFromNullStmt()

bool WalkUpFromNullStmt ( NullStmt * S)

Definition at line 1412 of file BuildTree.cpp.

◆ WalkUpFromParenExpr()

◆ WalkUpFromParenTypeLoc()

◆ WalkUpFromReturnStmt()

◆ WalkUpFromStaticAssertDecl()

◆ WalkUpFromStringLiteral()

bool WalkUpFromStringLiteral ( StringLiteral * S)

◆ WalkUpFromSwitchStmt()

◆ WalkUpFromTypeAliasDecl()

bool WalkUpFromTypeAliasDecl ( TypeAliasDecl * S)

Definition at line 1559 of file BuildTree.cpp.

◆ WalkUpFromUnaryOperator()

◆ WalkUpFromUnresolvedUsingTypenameDecl()

bool WalkUpFromUnresolvedUsingTypenameDecl ( UnresolvedUsingTypenameDecl * S)

Definition at line 1553 of file BuildTree.cpp.

◆ WalkUpFromUnresolvedUsingValueDecl()

bool WalkUpFromUnresolvedUsingValueDecl ( UnresolvedUsingValueDecl * S)

Definition at line 1547 of file BuildTree.cpp.

◆ WalkUpFromUsingDecl()

bool WalkUpFromUsingDecl ( UsingDecl * S)

Definition at line 1541 of file BuildTree.cpp.

◆ WalkUpFromUsingDirectiveDecl()

bool WalkUpFromUsingDirectiveDecl ( UsingDirectiveDecl * S)

Definition at line 1535 of file BuildTree.cpp.

◆ WalkUpFromWhileStmt()

Variable Documentation

◆ __pad0__

default __pad0__

Definition at line 995 of file BuildTree.cpp.

◆ true

default true

Gets the range of the initializer inside an init-declarator C++ [dcl.decl].

int a; -> range of `, / int *a = nullptr -> range of = nullptr. / int a{} -> range of {}. / int a() -> range of ()`. static SourceRange getInitializerRange(Decl *D) { if (auto *V = dyn_cast<VarDecl>(D)) { auto *I = V->getInit(); Initializers in range-based-for are not part of the declarator if (I && !V->isCXXForRangeDecl()) return I->getSourceRange(); }

return SourceRange(); }

/ Gets the range of declarator as defined by the C++ grammar. E.g. / int a; -> range of a, / int *a; -> range of *a, / int a[10]; -> range of a[10], / int a[1][2][3]; -> range of a[1][2][3], / int *a = nullptr -> range of *a = nullptr. / int S::f(){} -> range of S::f(). / FIXME: Name must be a source range. static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, SourceLocation Name, SourceRange Initializer) { SourceLocation Start = GetStartLoc().Visit(T); SourceLocation End = T.getEndLoc(); if (Name.isValid()) { if (Start.isInvalid()) Start = Name; End of TypeLoc could be invalid if the type is invalid, fallback to the NameLoc. if (End.isInvalid() || SM.isBeforeInTranslationUnit(End, Name)) End = Name; } if (Initializer.isValid()) { auto InitializerEnd = Initializer.getEnd(); assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd); End = InitializerEnd; } return SourceRange(Start, End); }

namespace { / All AST hierarchy roots that can be represented as pointers. using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; / Maintains a mapping from AST to syntax tree nodes. This class will get more / complicated as we support more kinds of AST nodes, e.g. TypeLocs. / FIXME: expose this as public API. class ASTToSyntaxMapping { public: void add(ASTPtr From, syntax::Tree *To) { assert(To != nullptr); assert(!From.isNull());

bool Added = Nodes.insert({From, To}).second; (void)Added; assert(Added && "mapping added twice"); }

void add(NestedNameSpecifierLoc From, syntax::Tree *To) { assert(To != nullptr); assert(From.hasQualifier());

bool Added = NNSNodes.insert({From, To}).second; (void)Added; assert(Added && "mapping added twice"); }

syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }

syntax::Tree *find(NestedNameSpecifierLoc P) const { return NNSNodes.lookup(P); }

private: llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; }; } // namespace

/ A helper class for constructing the syntax tree while traversing a clang / AST. / / At each point of the traversal we maintain a list of pending nodes. / Initially all tokens are added as pending nodes. When processing a clang AST / node, the clients need to: / - create a corresponding syntax node, / - assign roles to all pending child nodes with 'markChild' and / 'markChildToken', / - replace the child nodes with the new syntax node in the pending list / with 'foldNode'. / / Note that all children are expected to be processed when building a node. / / Call finalize() to finish building the tree and consume the root node. class syntax::TreeBuilder { public: TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM) : Arena(Arena), TBTM(TBTM), Pending(Arena, TBTM.tokenBuffer()) { for (const auto &T : TBTM.tokenBuffer().expandedTokens()) LocationToToken.insert({T.location(), &T}); }

llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } const SourceManager &sourceManager() const { return TBTM.sourceManager(); }

/ Populate children for New node, assuming it covers tokens from / Range. void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { assert(New); Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); }

void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { FIXME: add mapping for TypeLocs foldNode(Range, New, nullptr); }

void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, NestedNameSpecifierLoc From) { assert(New); Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); }

/ Populate children for New list, assuming it covers tokens from a / subrange of SuperRange. void foldList(ArrayRef<syntax::Token> SuperRange, syntax::List *New, ASTPtr From) { assert(New); auto ListRange = Pending.shrinkToFitList(SuperRange); Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New); if (From) Mapping.add(From, New); }

/ Notifies that we should not consume trailing semicolon when computing / token range of D. void noticeDeclWithoutSemicolon(Decl *D);

/ Mark the Child node with a corresponding Role. All marked children / should be consumed by foldNode. / When called on expressions (clang::Expr is derived from clang::Stmt), / wraps expressions into expression statement. void markStmtChild(Stmt *Child, NodeRole Role); / Should be called for expressions in non-statement position to avoid / wrapping into expression statement. void markExprChild(Expr *Child, NodeRole Role); / Set role for a token starting at Loc. void markChildToken(SourceLocation Loc, NodeRole R); / Set role for T. void markChildToken(const syntax::Token *T, NodeRole R);

/ Set role for N. void markChild(syntax::Node *N, NodeRole R); / Set role for the syntax node matching N. void markChild(ASTPtr N, NodeRole R); / Set role for the syntax node matching N. void markChild(NestedNameSpecifierLoc N, NodeRole R);

/ Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { auto Tokens = TBTM.tokenBuffer().expandedTokens(); assert(!Tokens.empty()); assert(Tokens.back().kind() == tok::eof);

Build the root of the tree, consuming all the children. Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(), new (Arena.getAllocator()) syntax::TranslationUnit);

auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); TU->assertInvariantsRecursive(); return TU; }

/ Finds a token starting at L. The token must exist if L is valid. const syntax::Token *findToken(SourceLocation L) const;

/ Finds the syntax tokens corresponding to the SourceRange. ArrayRef<syntax::Token> getRange(SourceRange Range) const { assert(Range.isValid()); return getRange(Range.getBegin(), Range.getEnd()); }

/ Finds the syntax tokens corresponding to the passed source locations. / First is the start position of the first token and Last is the start / position of the last token. ArrayRef<syntax::Token> getRange(SourceLocation First, SourceLocation Last) const { assert(First.isValid()); assert(Last.isValid()); assert(First == Last || TBTM.sourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::ArrayRef(findToken(First), std::next(findToken(Last))); }

ArrayRef<syntax::Token> getTemplateRange(const ClassTemplateSpecializationDecl *D) const { auto Tokens = getRange(D->getSourceRange()); return maybeAppendSemicolon(Tokens, D); }

/ Returns true if D is the last declarator in a chain and is thus / reponsible for creating SimpleDeclaration for the whole chain. bool isResponsibleForCreatingDeclaration(const Decl *D) const { assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && "only DeclaratorDecl and TypedefNameDecl are supported.");

const Decl *Next = D->getNextDeclInContext();

There's no next sibling, this one is responsible. if (Next == nullptr) { return true; }

Next sibling is not the same type, this one is responsible. if (D->getKind() != Next->getKind()) { return true; } Next sibling doesn't begin at the same loc, it must be a different declaration, so this declarator is responsible. if (Next->getBeginLoc() != D->getBeginLoc()) { return true; }

NextT is a member of the same declaration, and we need the last member to create declaration. This one is not responsible. return false; }

ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { ArrayRef<syntax::Token> Tokens; We want to drop the template parameters for specializations. if (const auto *S = dyn_cast<TagDecl>(D)) Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); else Tokens = getRange(D->getSourceRange()); return maybeAppendSemicolon(Tokens, D); }

ArrayRef<syntax::Token> getExprRange(const Expr *E) const { return getRange(E->getSourceRange()); }

/ Find the adjusted range for the statement, consuming the trailing / semicolon when needed. ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { auto Tokens = getRange(S->getSourceRange()); if (isa<CompoundStmt>(S)) return Tokens;

Some statements miss a trailing semicolon, e.g. 'return', 'continue' and all statements that end with those. Consume this semicolon here. if (Tokens.back().kind() == tok::semi) return Tokens; return withTrailingSemicolon(Tokens); }

private: ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, const Decl *D) const { if (isa<NamespaceDecl>(D)) return Tokens; if (DeclsWithoutSemicolons.count(D)) return Tokens; FIXME: do not consume trailing semicolon on function definitions. Most declarations own a semicolon in syntax trees, but not in clang AST. return withTrailingSemicolon(Tokens); }

ArrayRef<syntax::Token> withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { assert(!Tokens.empty()); assert(Tokens.back().kind() != tok::eof); We never consume 'eof', so looking at the next token is ok. if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) return llvm::ArrayRef(Tokens.begin(), Tokens.end() + 1); return Tokens; }

void setRole(syntax::Node *N, NodeRole R) { assert(N->getRole() == NodeRole::Detached); N->setRole(R); }

/ A collection of trees covering the input tokens. / When created, each tree corresponds to a single token in the file. / Clients call 'foldChildren' to attach one or more subtrees to a parent / node and update the list of trees accordingly. / / Ensures that added nodes properly nest and cover the whole token stream. struct Forest { Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) { assert(!TB.expandedTokens().empty()); assert(TB.expandedTokens().back().kind() == tok::eof); Create all leaf nodes. Note that we do not have 'eof' in the tree. for (const auto &T : TB.expandedTokens().drop_back()) { auto *L = new (A.getAllocator()) syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T)); L->Original = true; L->CanModify = TB.spelledForExpanded(T).has_value(); Trees.insert(Trees.end(), {&T, L}); } }

void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { assert(!Range.empty()); auto It = Trees.lower_bound(Range.begin()); assert(It != Trees.end() && "no node found"); assert(It->first == Range.begin() && "no child with the specified range"); assert((std::next(It) == Trees.end() || std::next(It)->first == Range.end()) && "no child with the specified range"); assert(It->second->getRole() == NodeRole::Detached && "re-assigning role for a child"); It->second->setRole(Role); }

/ Shrink Range to a subrange that only contains tokens of a list. / List elements and delimiters should already have correct roles. ArrayRef<syntax::Token> shrinkToFitList(ArrayRef<syntax::Token> Range) { auto BeginChildren = Trees.lower_bound(Range.begin()); assert((BeginChildren == Trees.end() || BeginChildren->first == Range.begin()) && "Range crosses boundaries of existing subtrees");

auto EndChildren = Trees.lower_bound(Range.end()); assert( (EndChildren == Trees.end() || EndChildren->first == Range.end()) && "Range crosses boundaries of existing subtrees");

auto BelongsToList = [](decltype(Trees)::value_type KV) { auto Role = KV.second->getRole(); return Role == syntax::NodeRole::ListElement || Role == syntax::NodeRole::ListDelimiter; };

auto BeginListChildren = std::find_if(BeginChildren, EndChildren, BelongsToList);

auto EndListChildren = std::find_if_not(BeginListChildren, EndChildren, BelongsToList);

return ArrayRef<syntax::Token>(BeginListChildren->first, EndListChildren->first); }

/ Add Node to the forest and attach child nodes based on Tokens. void foldChildren(const syntax::TokenBuffer &TB, ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { Attach children to Node. assert(Node->getFirstChild() == nullptr && "node already has children");

auto *FirstToken = Tokens.begin(); auto BeginChildren = Trees.lower_bound(FirstToken);

assert((BeginChildren == Trees.end() || BeginChildren->first == FirstToken) && "fold crosses boundaries of existing subtrees"); auto EndChildren = Trees.lower_bound(Tokens.end()); assert( (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && "fold crosses boundaries of existing subtrees");

for (auto It = BeginChildren; It != EndChildren; ++It) { auto *C = It->second; if (C->getRole() == NodeRole::Detached) C->setRole(NodeRole::Unknown); Node->appendChildLowLevel(C); }

Mark that this node came from the AST and is backed by the source code. Node->Original = true; Node->CanModify = TB.spelledForExpanded(Tokens).has_value();

Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, Node}); }

EXPECTS: all tokens were consumed and are owned by a single root node. syntax::Node *finalize() && { assert(Trees.size() == 1); auto *Root = Trees.begin()->second; Trees = {}; return Root; }

std::string str(const syntax::TokenBufferTokenManager &STM) const { std::string R; for (auto It = Trees.begin(); It != Trees.end(); ++It) { unsigned CoveredTokens = It != Trees.end() ? (std::next(It)->first - It->first) : STM.tokenBuffer().expandedTokens().end() - It->first;

R += std::string( formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), It->first->text(STM.sourceManager()), CoveredTokens)); R += It->second->dump(STM); } return R; }

private: / Maps from the start token to a subtree starting at that token. / Keys in the map are pointers into the array of expanded tokens, so / pointer order corresponds to the order of preprocessor tokens. std::map<const syntax::Token *, syntax::Node *> Trees; };

/ For debugging purposes. std::string str() { return Pending.str(TBTM); }

syntax::Arena &Arena; TokenBufferTokenManager& TBTM; / To quickly find tokens by their start location. llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken; Forest Pending; llvm::DenseSet<Decl *> DeclsWithoutSemicolons; ASTToSyntaxMapping Mapping; };

namespace { class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { public: explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) : Builder(Builder), Context(Context) {}

bool shouldTraversePostOrder() const { return true; }

bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { return processDeclaratorAndDeclaration(DD); }

bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { return processDeclaratorAndDeclaration(TD); }

bool VisitDecl(Decl *D) { assert(!D->isImplicit()); Builder.foldNode(Builder.getDeclarationRange(D), new (allocator()) syntax::UnknownDeclaration(), D); return true; }

RAV does not call WalkUpFrom* on explicit instantiations, so we have to override Traverse. FIXME: make RAV call WalkUpFrom* instead. bool TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) return false; if (C->isExplicitSpecialization()) return true; // we are only interested in explicit instantiations. auto *Declaration = cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); foldExplicitTemplateInstantiation( Builder.getTemplateRange(C), Builder.findToken(C->getExternKeywordLoc()), Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); return true; }

bool WalkUpFromTemplateDecl(TemplateDecl *S) { foldTemplateDeclaration( Builder.getDeclarationRange(S), Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), Builder.getDeclarationRange(S->getTemplatedDecl()), S); return true; }

bool WalkUpFromTagDecl(TagDecl *C) { FIXME: build the ClassSpecifier node. if (!C->isFreeStanding()) { assert(C->getNumTemplateParameterLists() == 0); return true; } handleFreeStandingTagDecl(C); return true; }

syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { assert(C->isFreeStanding()); Class is a declaration specifier and needs a spanning declaration node. auto DeclarationRange = Builder.getDeclarationRange(C); syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; Builder.foldNode(DeclarationRange, Result, nullptr);

Build TemplateDeclaration nodes if we had template parameters. auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); auto R = llvm::ArrayRef(TemplateKW, DeclarationRange.end()); Result = foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); DeclarationRange = R; }; if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) ConsumeTemplateParameters(*S->getTemplateParameters()); for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; –I) ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); return Result; }

bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { We do not want to call VisitDecl(), the declaration for translation unit is built by finalize(). return true; }

bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole;

Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); for (auto *Child : S->body()) Builder.markStmtChild(Child, NodeRole::Statement); Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);

Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::CompoundStatement, S); return true; }

Some statements are not yet handled by syntax trees. bool WalkUpFromStmt(Stmt *S) { Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::UnknownStatement, S); return true; }

bool TraverseIfStmt(IfStmt *S) { bool Result = [&, this]() { if (S->getInit() && !TraverseStmt(S->getInit())) { return false; } In cases where the condition is an initialized declaration in a statement, we want to preserve the declaration and ignore the implicit condition expression in the syntax tree. if (S->hasVarStorage()) { if (!TraverseStmt(S->getConditionVariableDeclStmt())) return false; } else if (S->getCond() && !TraverseStmt(S->getCond())) return false;

if (S->getThen() && !TraverseStmt(S->getThen())) return false; if (S->getElse() && !TraverseStmt(S->getElse())) return false; return true; }(); WalkUpFromIfStmt(S); return Result; }

bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { We override to traverse range initializer as VarDecl. RAV traverses it as a statement, we produce invalid node kinds in that case. FIXME: should do this in RAV instead? bool Result = [&, this]() { if (S->getInit() && !TraverseStmt(S->getInit())) return false; if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) return false; if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) return false; if (S->getBody() && !TraverseStmt(S->getBody())) return false; return true; }(); WalkUpFromCXXForRangeStmt(S); return Result; }

bool TraverseStmt(Stmt *S) { if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { We want to consume the semicolon, make sure SimpleDeclaration does not. for (auto *D : DS->decls()) Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = dyn_cast_or_null<Expr>(S)) { return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E)); } return RecursiveASTVisitor::TraverseStmt(S); }

bool TraverseOpaqueValueExpr(OpaqueValueExpr *VE) { OpaqueValue doesn't correspond to concrete syntax, ignore it. return true; }

Some expressions are not yet handled by syntax trees. bool WalkUpFromExpr(Expr *E) { assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); Builder.foldNode(Builder.getExprRange(E), new (allocator()) syntax::UnknownExpression, E); return true; }

bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { The semantic AST node UserDefinedLiteral (UDL) may have one child node referencing the location of the UDL suffix (_w in 1.2_w). The UDL suffix location does not point to the beginning of a token, so we can't represent the UDL suffix as a separate syntax tree node.

return WalkUpFromUserDefinedLiteral(S);

}

syntax::UserDefinedLiteralExpression * buildUserDefinedLiteral(UserDefinedLiteral *S) { switch (S->getLiteralOperatorKind()) { case UserDefinedLiteral::LOK_Integer: return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; case UserDefinedLiteral::LOK_Floating: return new (allocator()) syntax::FloatUserDefinedLiteralExpression; case UserDefinedLiteral::LOK_Character: return new (allocator()) syntax::CharUserDefinedLiteralExpression; case UserDefinedLiteral::LOK_String: return new (allocator()) syntax::StringUserDefinedLiteralExpression; case UserDefinedLiteral::LOK_Raw: case UserDefinedLiteral::LOK_Template: For raw literal operator and numeric literal operator template we cannot get the type of the operand in the semantic AST. We get this information from the token. As integer and floating point have the same token kind, we run NumericLiteralParser again to distinguish them. auto TokLoc = S->getBeginLoc(); auto TokSpelling = Builder.findToken(TokLoc)->text(Context.getSourceManager()); auto Literal = NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), Context.getLangOpts(), Context.getTargetInfo(), Context.getDiagnostics()); if (Literal.isIntegerLiteral()) return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; else { assert(Literal.isFloatingLiteral()); return new (allocator()) syntax::FloatUserDefinedLiteralExpression; } } llvm_unreachable("Unknown literal operator kind."); }

bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); return true; }

syntax::NameSpecifier *buildIdentifier(SourceRange SR, bool DropBack = false) { auto NameSpecifierTokens = Builder.getRange(SR).drop_back(DropBack); assert(NameSpecifierTokens.size() == 1); Builder.markChildToken(NameSpecifierTokens.begin(), syntax::NodeRole::Unknown); auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; Builder.foldNode(NameSpecifierTokens, NS, nullptr); return NS; }

syntax::NameSpecifier *buildSimpleTemplateName(SourceRange SR) { auto NameSpecifierTokens = Builder.getRange(SR); TODO: Build SimpleTemplateNameSpecifier children and implement accessors to them. Be aware, we cannot do that simply by calling TraverseTypeLoc, some TypeLocs have inside them the previous name specifier and we want to treat them independently. auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; Builder.foldNode(NameSpecifierTokens, NS, nullptr); return NS; }

syntax::NameSpecifier * buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { assert(NNSLoc.hasQualifier()); switch (NNSLoc.getNestedNameSpecifier().getKind()) { case NestedNameSpecifier::Kind::Global: return new (allocator()) syntax::GlobalNameSpecifier;

case NestedNameSpecifier::Kind::Namespace: return buildIdentifier(NNSLoc.getLocalSourceRange(), /*DropBack=

Definition at line 952 of file BuildTree.cpp.