clang API Documentation

FileMatchTrie.cpp
Go to the documentation of this file.
00001 //===--- FileMatchTrie.cpp - ----------------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file contains the implementation of a FileMatchTrie.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "clang/Tooling/FileMatchTrie.h"
00015 #include "llvm/ADT/StringMap.h"
00016 #include "llvm/Support/FileSystem.h"
00017 #include "llvm/Support/Path.h"
00018 #include "llvm/Support/raw_ostream.h"
00019 #include <sstream>
00020 
00021 namespace clang {
00022 namespace tooling {
00023 
00024 /// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent().
00025 struct DefaultPathComparator : public PathComparator {
00026   virtual ~DefaultPathComparator() {}
00027   bool equivalent(StringRef FileA, StringRef FileB) const override {
00028     return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
00029   }
00030 };
00031 
00032 /// \brief A node of the \c FileMatchTrie.
00033 ///
00034 /// Each node has storage for up to one path and a map mapping a path segment to
00035 /// child nodes. The trie starts with an empty root node.
00036 class FileMatchTrieNode {
00037 public:
00038   /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes
00039   /// the number of \c NewPath's trailing characters already consumed during
00040   /// recursion.
00041   ///
00042   /// An insert of a path
00043   /// 'p'starts at the root node and does the following:
00044   /// - If the node is empty, insert 'p' into its storage and abort.
00045   /// - If the node has a path 'p2' but no children, take the last path segment
00046   ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
00047   ///   'p2' there.
00048   /// - Insert a new child for the last segment of 'p' and insert the rest of
00049   ///   'p' there.
00050   ///
00051   /// An insert operation is linear in the number of a path's segments.
00052   void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
00053     // We cannot put relative paths into the FileMatchTrie as then a path can be
00054     // a postfix of another path, violating a core assumption of the trie.
00055     if (llvm::sys::path::is_relative(NewPath))
00056       return;
00057     if (Path.empty()) {
00058       // This is an empty leaf. Store NewPath and return.
00059       Path = NewPath;
00060       return;
00061     }
00062     if (Children.empty()) {
00063       // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
00064       if (NewPath == Path)
00065           return;
00066       // Make this a node and create a child-leaf with 'Path'.
00067       StringRef Element(llvm::sys::path::filename(
00068           StringRef(Path).drop_back(ConsumedLength)));
00069       Children[Element].Path = Path;
00070     }
00071     StringRef Element(llvm::sys::path::filename(
00072           StringRef(NewPath).drop_back(ConsumedLength)));
00073     Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
00074   }
00075 
00076   /// \brief Tries to find the node under this \c FileMatchTrieNode that best
00077   /// matches 'FileName'.
00078   ///
00079   /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
00080   /// \c true and an empty string is returned. If no path fits 'FileName', an
00081   /// empty string is returned. \c ConsumedLength denotes the number of
00082   /// \c Filename's trailing characters already consumed during recursion.
00083   ///
00084   /// To find the best matching node for a given path 'p', the
00085   /// \c findEquivalent() function is called recursively for each path segment
00086   /// (back to fron) of 'p' until a node 'n' is reached that does not ..
00087   /// - .. have children. In this case it is checked
00088   ///   whether the stored path is equivalent to 'p'. If yes, the best match is
00089   ///   found. Otherwise continue with the parent node as if this node did not
00090   ///   exist.
00091   /// - .. a child matching the next path segment. In this case, all children of
00092   ///   'n' are an equally good match for 'p'. All children are of 'n' are found
00093   ///   recursively and their equivalence to 'p' is determined. If none are
00094   ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
00095   ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
00096   ///   error.
00097   StringRef findEquivalent(const PathComparator& Comparator,
00098                            StringRef FileName,
00099                            bool &IsAmbiguous,
00100                            unsigned ConsumedLength = 0) const {
00101     if (Children.empty()) {
00102       if (Comparator.equivalent(StringRef(Path), FileName))
00103         return StringRef(Path);
00104       return StringRef();
00105     }
00106     StringRef Element(llvm::sys::path::filename(FileName.drop_back(
00107         ConsumedLength)));
00108     llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
00109         Children.find(Element);
00110     if (MatchingChild != Children.end()) {
00111       StringRef Result = MatchingChild->getValue().findEquivalent(
00112           Comparator, FileName, IsAmbiguous,
00113           ConsumedLength + Element.size() + 1);
00114       if (!Result.empty() || IsAmbiguous)
00115         return Result;
00116     }
00117     std::vector<StringRef> AllChildren;
00118     getAll(AllChildren, MatchingChild);
00119     StringRef Result;
00120     for (unsigned i = 0; i < AllChildren.size(); i++) {
00121       if (Comparator.equivalent(AllChildren[i], FileName)) {
00122         if (Result.empty()) {
00123           Result = AllChildren[i];
00124         } else {
00125           IsAmbiguous = true;
00126           return StringRef();
00127         }
00128       }
00129     }
00130     return Result;
00131   }
00132 
00133 private:
00134   /// \brief Gets all paths under this FileMatchTrieNode.
00135   void getAll(std::vector<StringRef> &Results,
00136               llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
00137     if (Path.empty())
00138       return;
00139     if (Children.empty()) {
00140       Results.push_back(StringRef(Path));
00141       return;
00142     }
00143     for (llvm::StringMap<FileMatchTrieNode>::const_iterator
00144          It = Children.begin(), E = Children.end();
00145          It != E; ++It) {
00146       if (It == Except)
00147         continue;
00148       It->getValue().getAll(Results, Children.end());
00149     }
00150   }
00151 
00152   // The stored absolute path in this node. Only valid for leaf nodes, i.e.
00153   // nodes where Children.empty().
00154   std::string Path;
00155 
00156   // The children of this node stored in a map based on the next path segment.
00157   llvm::StringMap<FileMatchTrieNode> Children;
00158 };
00159 
00160 FileMatchTrie::FileMatchTrie()
00161   : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
00162 
00163 FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
00164   : Root(new FileMatchTrieNode), Comparator(Comparator) {}
00165 
00166 FileMatchTrie::~FileMatchTrie() {
00167   delete Root;
00168 }
00169 
00170 void FileMatchTrie::insert(StringRef NewPath) {
00171   Root->insert(NewPath);
00172 }
00173 
00174 StringRef FileMatchTrie::findEquivalent(StringRef FileName,
00175                                         raw_ostream &Error) const {
00176   if (llvm::sys::path::is_relative(FileName)) {
00177     Error << "Cannot resolve relative paths";
00178     return StringRef();
00179   }
00180   bool IsAmbiguous = false;
00181   StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
00182   if (IsAmbiguous)
00183     Error << "Path is ambiguous";
00184   return Result;
00185 }
00186 
00187 } // end namespace tooling
00188 } // end namespace clang