clang API Documentation

FileManager.cpp
Go to the documentation of this file.
00001 //===--- FileManager.cpp - File System Probing and Caching ----------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file implements the FileManager interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 //
00014 // TODO: This should index all interesting directories with dirent calls.
00015 //  getdirentries ?
00016 //  opendir/readdir_r/closedir ?
00017 //
00018 //===----------------------------------------------------------------------===//
00019 
00020 #include "clang/Basic/FileManager.h"
00021 #include "clang/Basic/FileSystemStatCache.h"
00022 #include "llvm/ADT/SmallString.h"
00023 #include "llvm/Config/llvm-config.h"
00024 #include "llvm/Support/FileSystem.h"
00025 #include "llvm/Support/MemoryBuffer.h"
00026 #include "llvm/Support/Path.h"
00027 #include "llvm/Support/raw_ostream.h"
00028 #include <map>
00029 #include <set>
00030 #include <string>
00031 #include <system_error>
00032 
00033 using namespace clang;
00034 
00035 /// NON_EXISTENT_DIR - A special value distinct from null that is used to
00036 /// represent a dir name that doesn't exist on the disk.
00037 #define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
00038 
00039 /// NON_EXISTENT_FILE - A special value distinct from null that is used to
00040 /// represent a filename that doesn't exist on the disk.
00041 #define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
00042 
00043 //===----------------------------------------------------------------------===//
00044 // Common logic.
00045 //===----------------------------------------------------------------------===//
00046 
00047 FileManager::FileManager(const FileSystemOptions &FSO,
00048                          IntrusiveRefCntPtr<vfs::FileSystem> FS)
00049   : FS(FS), FileSystemOpts(FSO),
00050     SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) {
00051   NumDirLookups = NumFileLookups = 0;
00052   NumDirCacheMisses = NumFileCacheMisses = 0;
00053 
00054   // If the caller doesn't provide a virtual file system, just grab the real
00055   // file system.
00056   if (!FS)
00057     this->FS = vfs::getRealFileSystem();
00058 }
00059 
00060 FileManager::~FileManager() {
00061   for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i)
00062     delete VirtualFileEntries[i];
00063   for (unsigned i = 0, e = VirtualDirectoryEntries.size(); i != e; ++i)
00064     delete VirtualDirectoryEntries[i];
00065 }
00066 
00067 void FileManager::addStatCache(std::unique_ptr<FileSystemStatCache> statCache,
00068                                bool AtBeginning) {
00069   assert(statCache && "No stat cache provided?");
00070   if (AtBeginning || !StatCache.get()) {
00071     statCache->setNextStatCache(std::move(StatCache));
00072     StatCache = std::move(statCache);
00073     return;
00074   }
00075   
00076   FileSystemStatCache *LastCache = StatCache.get();
00077   while (LastCache->getNextStatCache())
00078     LastCache = LastCache->getNextStatCache();
00079 
00080   LastCache->setNextStatCache(std::move(statCache));
00081 }
00082 
00083 void FileManager::removeStatCache(FileSystemStatCache *statCache) {
00084   if (!statCache)
00085     return;
00086   
00087   if (StatCache.get() == statCache) {
00088     // This is the first stat cache.
00089     StatCache = StatCache->takeNextStatCache();
00090     return;
00091   }
00092   
00093   // Find the stat cache in the list.
00094   FileSystemStatCache *PrevCache = StatCache.get();
00095   while (PrevCache && PrevCache->getNextStatCache() != statCache)
00096     PrevCache = PrevCache->getNextStatCache();
00097   
00098   assert(PrevCache && "Stat cache not found for removal");
00099   PrevCache->setNextStatCache(statCache->takeNextStatCache());
00100 }
00101 
00102 void FileManager::clearStatCaches() {
00103   StatCache.reset();
00104 }
00105 
00106 /// \brief Retrieve the directory that the given file name resides in.
00107 /// Filename can point to either a real file or a virtual file.
00108 static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
00109                                                   StringRef Filename,
00110                                                   bool CacheFailure) {
00111   if (Filename.empty())
00112     return nullptr;
00113 
00114   if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
00115     return nullptr; // If Filename is a directory.
00116 
00117   StringRef DirName = llvm::sys::path::parent_path(Filename);
00118   // Use the current directory if file has no path component.
00119   if (DirName.empty())
00120     DirName = ".";
00121 
00122   return FileMgr.getDirectory(DirName, CacheFailure);
00123 }
00124 
00125 /// Add all ancestors of the given path (pointing to either a file or
00126 /// a directory) as virtual directories.
00127 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
00128   StringRef DirName = llvm::sys::path::parent_path(Path);
00129   if (DirName.empty())
00130     return;
00131 
00132   llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
00133     SeenDirEntries.GetOrCreateValue(DirName);
00134 
00135   // When caching a virtual directory, we always cache its ancestors
00136   // at the same time.  Therefore, if DirName is already in the cache,
00137   // we don't need to recurse as its ancestors must also already be in
00138   // the cache.
00139   if (NamedDirEnt.getValue())
00140     return;
00141 
00142   // Add the virtual directory to the cache.
00143   DirectoryEntry *UDE = new DirectoryEntry;
00144   UDE->Name = NamedDirEnt.getKeyData();
00145   NamedDirEnt.setValue(UDE);
00146   VirtualDirectoryEntries.push_back(UDE);
00147 
00148   // Recursively add the other ancestors.
00149   addAncestorsAsVirtualDirs(DirName);
00150 }
00151 
00152 const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
00153                                                 bool CacheFailure) {
00154   // stat doesn't like trailing separators except for root directory.
00155   // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
00156   // (though it can strip '\\')
00157   if (DirName.size() > 1 &&
00158       DirName != llvm::sys::path::root_path(DirName) &&
00159       llvm::sys::path::is_separator(DirName.back()))
00160     DirName = DirName.substr(0, DirName.size()-1);
00161 #ifdef LLVM_ON_WIN32
00162   // Fixing a problem with "clang C:test.c" on Windows.
00163   // Stat("C:") does not recognize "C:" as a valid directory
00164   std::string DirNameStr;
00165   if (DirName.size() > 1 && DirName.back() == ':' &&
00166       DirName.equals_lower(llvm::sys::path::root_name(DirName))) {
00167     DirNameStr = DirName.str() + '.';
00168     DirName = DirNameStr;
00169   }
00170 #endif
00171 
00172   ++NumDirLookups;
00173   llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
00174     SeenDirEntries.GetOrCreateValue(DirName);
00175 
00176   // See if there was already an entry in the map.  Note that the map
00177   // contains both virtual and real directories.
00178   if (NamedDirEnt.getValue())
00179     return NamedDirEnt.getValue() == NON_EXISTENT_DIR ? nullptr
00180                                                       : NamedDirEnt.getValue();
00181 
00182   ++NumDirCacheMisses;
00183 
00184   // By default, initialize it to invalid.
00185   NamedDirEnt.setValue(NON_EXISTENT_DIR);
00186 
00187   // Get the null-terminated directory name as stored as the key of the
00188   // SeenDirEntries map.
00189   const char *InterndDirName = NamedDirEnt.getKeyData();
00190 
00191   // Check to see if the directory exists.
00192   FileData Data;
00193   if (getStatValue(InterndDirName, Data, false, nullptr /*directory lookup*/)) {
00194     // There's no real directory at the given path.
00195     if (!CacheFailure)
00196       SeenDirEntries.erase(DirName);
00197     return nullptr;
00198   }
00199 
00200   // It exists.  See if we have already opened a directory with the
00201   // same inode (this occurs on Unix-like systems when one dir is
00202   // symlinked to another, for example) or the same path (on
00203   // Windows).
00204   DirectoryEntry &UDE = UniqueRealDirs[Data.UniqueID];
00205 
00206   NamedDirEnt.setValue(&UDE);
00207   if (!UDE.getName()) {
00208     // We don't have this directory yet, add it.  We use the string
00209     // key from the SeenDirEntries map as the string.
00210     UDE.Name  = InterndDirName;
00211   }
00212 
00213   return &UDE;
00214 }
00215 
00216 const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
00217                                       bool CacheFailure) {
00218   ++NumFileLookups;
00219 
00220   // See if there is already an entry in the map.
00221   llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
00222     SeenFileEntries.GetOrCreateValue(Filename);
00223 
00224   // See if there is already an entry in the map.
00225   if (NamedFileEnt.getValue())
00226     return NamedFileEnt.getValue() == NON_EXISTENT_FILE
00227                  ? nullptr : NamedFileEnt.getValue();
00228 
00229   ++NumFileCacheMisses;
00230 
00231   // By default, initialize it to invalid.
00232   NamedFileEnt.setValue(NON_EXISTENT_FILE);
00233 
00234   // Get the null-terminated file name as stored as the key of the
00235   // SeenFileEntries map.
00236   const char *InterndFileName = NamedFileEnt.getKeyData();
00237 
00238   // Look up the directory for the file.  When looking up something like
00239   // sys/foo.h we'll discover all of the search directories that have a 'sys'
00240   // subdirectory.  This will let us avoid having to waste time on known-to-fail
00241   // searches when we go to find sys/bar.h, because all the search directories
00242   // without a 'sys' subdir will get a cached failure result.
00243   const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
00244                                                        CacheFailure);
00245   if (DirInfo == nullptr) { // Directory doesn't exist, file can't exist.
00246     if (!CacheFailure)
00247       SeenFileEntries.erase(Filename);
00248 
00249     return nullptr;
00250   }
00251   
00252   // FIXME: Use the directory info to prune this, before doing the stat syscall.
00253   // FIXME: This will reduce the # syscalls.
00254 
00255   // Nope, there isn't.  Check to see if the file exists.
00256   std::unique_ptr<vfs::File> F;
00257   FileData Data;
00258   if (getStatValue(InterndFileName, Data, true, openFile ? &F : nullptr)) {
00259     // There's no real file at the given path.
00260     if (!CacheFailure)
00261       SeenFileEntries.erase(Filename);
00262 
00263     return nullptr;
00264   }
00265 
00266   assert((openFile || !F) && "undesired open file");
00267 
00268   // It exists.  See if we have already opened a file with the same inode.
00269   // This occurs when one dir is symlinked to another, for example.
00270   FileEntry &UFE = UniqueRealFiles[Data.UniqueID];
00271 
00272   NamedFileEnt.setValue(&UFE);
00273 
00274   // If the name returned by getStatValue is different than Filename, re-intern
00275   // the name.
00276   if (Data.Name != Filename) {
00277     auto &NamedFileEnt = SeenFileEntries.GetOrCreateValue(Data.Name);
00278     if (!NamedFileEnt.getValue())
00279       NamedFileEnt.setValue(&UFE);
00280     else
00281       assert(NamedFileEnt.getValue() == &UFE &&
00282              "filename from getStatValue() refers to wrong file");
00283     InterndFileName = NamedFileEnt.getKeyData();
00284   }
00285 
00286   if (UFE.isValid()) { // Already have an entry with this inode, return it.
00287 
00288     // FIXME: this hack ensures that if we look up a file by a virtual path in
00289     // the VFS that the getDir() will have the virtual path, even if we found
00290     // the file by a 'real' path first. This is required in order to find a
00291     // module's structure when its headers/module map are mapped in the VFS.
00292     // We should remove this as soon as we can properly support a file having
00293     // multiple names.
00294     if (DirInfo != UFE.Dir && Data.IsVFSMapped)
00295       UFE.Dir = DirInfo;
00296 
00297     // Always update the name to use the last name by which a file was accessed.
00298     // FIXME: Neither this nor always using the first name is correct; we want
00299     // to switch towards a design where we return a FileName object that
00300     // encapsulates both the name by which the file was accessed and the
00301     // corresponding FileEntry.
00302     UFE.Name = InterndFileName;
00303 
00304     return &UFE;
00305   }
00306 
00307   // Otherwise, we don't have this file yet, add it.
00308   UFE.Name    = InterndFileName;
00309   UFE.Size = Data.Size;
00310   UFE.ModTime = Data.ModTime;
00311   UFE.Dir     = DirInfo;
00312   UFE.UID     = NextFileUID++;
00313   UFE.UniqueID = Data.UniqueID;
00314   UFE.IsNamedPipe = Data.IsNamedPipe;
00315   UFE.InPCH = Data.InPCH;
00316   UFE.File = std::move(F);
00317   UFE.IsValid = true;
00318   return &UFE;
00319 }
00320 
00321 const FileEntry *
00322 FileManager::getVirtualFile(StringRef Filename, off_t Size,
00323                             time_t ModificationTime) {
00324   ++NumFileLookups;
00325 
00326   // See if there is already an entry in the map.
00327   llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
00328     SeenFileEntries.GetOrCreateValue(Filename);
00329 
00330   // See if there is already an entry in the map.
00331   if (NamedFileEnt.getValue() && NamedFileEnt.getValue() != NON_EXISTENT_FILE)
00332     return NamedFileEnt.getValue();
00333 
00334   ++NumFileCacheMisses;
00335 
00336   // By default, initialize it to invalid.
00337   NamedFileEnt.setValue(NON_EXISTENT_FILE);
00338 
00339   addAncestorsAsVirtualDirs(Filename);
00340   FileEntry *UFE = nullptr;
00341 
00342   // Now that all ancestors of Filename are in the cache, the
00343   // following call is guaranteed to find the DirectoryEntry from the
00344   // cache.
00345   const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
00346                                                        /*CacheFailure=*/true);
00347   assert(DirInfo &&
00348          "The directory of a virtual file should already be in the cache.");
00349 
00350   // Check to see if the file exists. If so, drop the virtual file
00351   FileData Data;
00352   const char *InterndFileName = NamedFileEnt.getKeyData();
00353   if (getStatValue(InterndFileName, Data, true, nullptr) == 0) {
00354     Data.Size = Size;
00355     Data.ModTime = ModificationTime;
00356     UFE = &UniqueRealFiles[Data.UniqueID];
00357 
00358     NamedFileEnt.setValue(UFE);
00359 
00360     // If we had already opened this file, close it now so we don't
00361     // leak the descriptor. We're not going to use the file
00362     // descriptor anyway, since this is a virtual file.
00363     if (UFE->File)
00364       UFE->closeFile();
00365 
00366     // If we already have an entry with this inode, return it.
00367     if (UFE->isValid())
00368       return UFE;
00369 
00370     UFE->UniqueID = Data.UniqueID;
00371     UFE->IsNamedPipe = Data.IsNamedPipe;
00372     UFE->InPCH = Data.InPCH;
00373   }
00374 
00375   if (!UFE) {
00376     UFE = new FileEntry();
00377     VirtualFileEntries.push_back(UFE);
00378     NamedFileEnt.setValue(UFE);
00379   }
00380 
00381   UFE->Name    = InterndFileName;
00382   UFE->Size    = Size;
00383   UFE->ModTime = ModificationTime;
00384   UFE->Dir     = DirInfo;
00385   UFE->UID     = NextFileUID++;
00386   UFE->File.reset();
00387   return UFE;
00388 }
00389 
00390 void FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
00391   StringRef pathRef(path.data(), path.size());
00392 
00393   if (FileSystemOpts.WorkingDir.empty() 
00394       || llvm::sys::path::is_absolute(pathRef))
00395     return;
00396 
00397   SmallString<128> NewPath(FileSystemOpts.WorkingDir);
00398   llvm::sys::path::append(NewPath, pathRef);
00399   path = NewPath;
00400 }
00401 
00402 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
00403 FileManager::getBufferForFile(const FileEntry *Entry, bool isVolatile,
00404                               bool ShouldCloseOpenFile) {
00405   uint64_t FileSize = Entry->getSize();
00406   // If there's a high enough chance that the file have changed since we
00407   // got its size, force a stat before opening it.
00408   if (isVolatile)
00409     FileSize = -1;
00410 
00411   const char *Filename = Entry->getName();
00412   // If the file is already open, use the open file descriptor.
00413   if (Entry->File) {
00414     auto Result =
00415         Entry->File->getBuffer(Filename, FileSize,
00416                                /*RequiresNullTerminator=*/true, isVolatile);
00417     // FIXME: we need a set of APIs that can make guarantees about whether a
00418     // FileEntry is open or not.
00419     if (ShouldCloseOpenFile)
00420       Entry->closeFile();
00421     return Result;
00422   }
00423 
00424   // Otherwise, open the file.
00425 
00426   if (FileSystemOpts.WorkingDir.empty())
00427     return FS->getBufferForFile(Filename, FileSize,
00428                                 /*RequiresNullTerminator=*/true, isVolatile);
00429 
00430   SmallString<128> FilePath(Entry->getName());
00431   FixupRelativePath(FilePath);
00432   return FS->getBufferForFile(FilePath.str(), FileSize,
00433                               /*RequiresNullTerminator=*/true, isVolatile);
00434 }
00435 
00436 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
00437 FileManager::getBufferForFile(StringRef Filename) {
00438   if (FileSystemOpts.WorkingDir.empty())
00439     return FS->getBufferForFile(Filename);
00440 
00441   SmallString<128> FilePath(Filename);
00442   FixupRelativePath(FilePath);
00443   return FS->getBufferForFile(FilePath.c_str());
00444 }
00445 
00446 /// getStatValue - Get the 'stat' information for the specified path,
00447 /// using the cache to accelerate it if possible.  This returns true
00448 /// if the path points to a virtual file or does not exist, or returns
00449 /// false if it's an existent real file.  If FileDescriptor is NULL,
00450 /// do directory look-up instead of file look-up.
00451 bool FileManager::getStatValue(const char *Path, FileData &Data, bool isFile,
00452                                std::unique_ptr<vfs::File> *F) {
00453   // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
00454   // absolute!
00455   if (FileSystemOpts.WorkingDir.empty())
00456     return FileSystemStatCache::get(Path, Data, isFile, F,StatCache.get(), *FS);
00457 
00458   SmallString<128> FilePath(Path);
00459   FixupRelativePath(FilePath);
00460 
00461   return FileSystemStatCache::get(FilePath.c_str(), Data, isFile, F,
00462                                   StatCache.get(), *FS);
00463 }
00464 
00465 bool FileManager::getNoncachedStatValue(StringRef Path,
00466                                         vfs::Status &Result) {
00467   SmallString<128> FilePath(Path);
00468   FixupRelativePath(FilePath);
00469 
00470   llvm::ErrorOr<vfs::Status> S = FS->status(FilePath.c_str());
00471   if (!S)
00472     return true;
00473   Result = *S;
00474   return false;
00475 }
00476 
00477 void FileManager::invalidateCache(const FileEntry *Entry) {
00478   assert(Entry && "Cannot invalidate a NULL FileEntry");
00479 
00480   SeenFileEntries.erase(Entry->getName());
00481 
00482   // FileEntry invalidation should not block future optimizations in the file
00483   // caches. Possible alternatives are cache truncation (invalidate last N) or
00484   // invalidation of the whole cache.
00485   UniqueRealFiles.erase(Entry->getUniqueID());
00486 }
00487 
00488 
00489 void FileManager::GetUniqueIDMapping(
00490                    SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
00491   UIDToFiles.clear();
00492   UIDToFiles.resize(NextFileUID);
00493   
00494   // Map file entries
00495   for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
00496          FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
00497        FE != FEEnd; ++FE)
00498     if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
00499       UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
00500   
00501   // Map virtual file entries
00502   for (SmallVectorImpl<FileEntry *>::const_iterator
00503          VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
00504        VFE != VFEEnd; ++VFE)
00505     if (*VFE && *VFE != NON_EXISTENT_FILE)
00506       UIDToFiles[(*VFE)->getUID()] = *VFE;
00507 }
00508 
00509 void FileManager::modifyFileEntry(FileEntry *File,
00510                                   off_t Size, time_t ModificationTime) {
00511   File->Size = Size;
00512   File->ModTime = ModificationTime;
00513 }
00514 
00515 StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
00516   // FIXME: use llvm::sys::fs::canonical() when it gets implemented
00517 #ifdef LLVM_ON_UNIX
00518   llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
00519     = CanonicalDirNames.find(Dir);
00520   if (Known != CanonicalDirNames.end())
00521     return Known->second;
00522 
00523   StringRef CanonicalName(Dir->getName());
00524   char CanonicalNameBuf[PATH_MAX];
00525   if (realpath(Dir->getName(), CanonicalNameBuf)) {
00526     unsigned Len = strlen(CanonicalNameBuf);
00527     char *Mem = static_cast<char *>(CanonicalNameStorage.Allocate(Len, 1));
00528     memcpy(Mem, CanonicalNameBuf, Len);
00529     CanonicalName = StringRef(Mem, Len);
00530   }
00531 
00532   CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName));
00533   return CanonicalName;
00534 #else
00535   return StringRef(Dir->getName());
00536 #endif
00537 }
00538 
00539 void FileManager::PrintStats() const {
00540   llvm::errs() << "\n*** File Manager Stats:\n";
00541   llvm::errs() << UniqueRealFiles.size() << " real files found, "
00542                << UniqueRealDirs.size() << " real dirs found.\n";
00543   llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
00544                << VirtualDirectoryEntries.size() << " virtual dirs found.\n";
00545   llvm::errs() << NumDirLookups << " dir lookups, "
00546                << NumDirCacheMisses << " dir cache misses.\n";
00547   llvm::errs() << NumFileLookups << " file lookups, "
00548                << NumFileCacheMisses << " file cache misses.\n";
00549 
00550   //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
00551 }