LLVM API Documentation

FileUtilities.cpp
Go to the documentation of this file.
00001 //===- Support/FileUtilities.cpp - File System Utilities ------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements a family of utility functions which are useful for doing
00011 // various things with files.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/Support/FileUtilities.h"
00016 #include "llvm/ADT/SmallString.h"
00017 #include "llvm/Support/MemoryBuffer.h"
00018 #include "llvm/Support/Path.h"
00019 #include "llvm/Support/raw_ostream.h"
00020 #include <cctype>
00021 #include <cstdlib>
00022 #include <cstring>
00023 #include <system_error>
00024 using namespace llvm;
00025 
00026 static bool isSignedChar(char C) {
00027   return (C == '+' || C == '-');
00028 }
00029 
00030 static bool isExponentChar(char C) {
00031   switch (C) {
00032   case 'D':  // Strange exponential notation.
00033   case 'd':  // Strange exponential notation.
00034   case 'e':
00035   case 'E': return true;
00036   default: return false;
00037   }
00038 }
00039 
00040 static bool isNumberChar(char C) {
00041   switch (C) {
00042   case '0': case '1': case '2': case '3': case '4':
00043   case '5': case '6': case '7': case '8': case '9':
00044   case '.': return true;
00045   default: return isSignedChar(C) || isExponentChar(C);
00046   }
00047 }
00048 
00049 static const char *BackupNumber(const char *Pos, const char *FirstChar) {
00050   // If we didn't stop in the middle of a number, don't backup.
00051   if (!isNumberChar(*Pos)) return Pos;
00052 
00053   // Otherwise, return to the start of the number.
00054   bool HasPeriod = false;
00055   while (Pos > FirstChar && isNumberChar(Pos[-1])) {
00056     // Backup over at most one period.
00057     if (Pos[-1] == '.') {
00058       if (HasPeriod)
00059         break;
00060       HasPeriod = true;
00061     }
00062 
00063     --Pos;
00064     if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
00065       break;
00066   }
00067   return Pos;
00068 }
00069 
00070 /// EndOfNumber - Return the first character that is not part of the specified
00071 /// number.  This assumes that the buffer is null terminated, so it won't fall
00072 /// off the end.
00073 static const char *EndOfNumber(const char *Pos) {
00074   while (isNumberChar(*Pos))
00075     ++Pos;
00076   return Pos;
00077 }
00078 
00079 /// CompareNumbers - compare two numbers, returning true if they are different.
00080 static bool CompareNumbers(const char *&F1P, const char *&F2P,
00081                            const char *F1End, const char *F2End,
00082                            double AbsTolerance, double RelTolerance,
00083                            std::string *ErrorMsg) {
00084   const char *F1NumEnd, *F2NumEnd;
00085   double V1 = 0.0, V2 = 0.0;
00086 
00087   // If one of the positions is at a space and the other isn't, chomp up 'til
00088   // the end of the space.
00089   while (isspace(static_cast<unsigned char>(*F1P)) && F1P != F1End)
00090     ++F1P;
00091   while (isspace(static_cast<unsigned char>(*F2P)) && F2P != F2End)
00092     ++F2P;
00093 
00094   // If we stop on numbers, compare their difference.
00095   if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) {
00096     // The diff failed.
00097     F1NumEnd = F1P;
00098     F2NumEnd = F2P;
00099   } else {
00100     // Note that some ugliness is built into this to permit support for numbers
00101     // that use "D" or "d" as their exponential marker, e.g. "1.234D45".  This
00102     // occurs in 200.sixtrack in spec2k.
00103     V1 = strtod(F1P, const_cast<char**>(&F1NumEnd));
00104     V2 = strtod(F2P, const_cast<char**>(&F2NumEnd));
00105 
00106     if (*F1NumEnd == 'D' || *F1NumEnd == 'd') {
00107       // Copy string into tmp buffer to replace the 'D' with an 'e'.
00108       SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
00109       // Strange exponential notation!
00110       StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
00111 
00112       V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
00113       F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
00114     }
00115 
00116     if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
00117       // Copy string into tmp buffer to replace the 'D' with an 'e'.
00118       SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
00119       // Strange exponential notation!
00120       StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
00121 
00122       V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
00123       F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
00124     }
00125   }
00126 
00127   if (F1NumEnd == F1P || F2NumEnd == F2P) {
00128     if (ErrorMsg) {
00129       *ErrorMsg = "FP Comparison failed, not a numeric difference between '";
00130       *ErrorMsg += F1P[0];
00131       *ErrorMsg += "' and '";
00132       *ErrorMsg += F2P[0];
00133       *ErrorMsg += "'";
00134     }
00135     return true;
00136   }
00137 
00138   // Check to see if these are inside the absolute tolerance
00139   if (AbsTolerance < std::abs(V1-V2)) {
00140     // Nope, check the relative tolerance...
00141     double Diff;
00142     if (V2)
00143       Diff = std::abs(V1/V2 - 1.0);
00144     else if (V1)
00145       Diff = std::abs(V2/V1 - 1.0);
00146     else
00147       Diff = 0;  // Both zero.
00148     if (Diff > RelTolerance) {
00149       if (ErrorMsg) {
00150         raw_string_ostream(*ErrorMsg)
00151           << "Compared: " << V1 << " and " << V2 << '\n'
00152           << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n'
00153           << "Out of tolerance: rel/abs: " << RelTolerance << '/'
00154           << AbsTolerance;
00155       }
00156       return true;
00157     }
00158   }
00159 
00160   // Otherwise, advance our read pointers to the end of the numbers.
00161   F1P = F1NumEnd;  F2P = F2NumEnd;
00162   return false;
00163 }
00164 
00165 /// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
00166 /// files match, 1 if they are different, and 2 if there is a file error.  This
00167 /// function differs from DiffFiles in that you can specify an absolete and
00168 /// relative FP error that is allowed to exist.  If you specify a string to fill
00169 /// in for the error option, it will set the string to an error message if an
00170 /// error occurs, allowing the caller to distinguish between a failed diff and a
00171 /// file system error.
00172 ///
00173 int llvm::DiffFilesWithTolerance(StringRef NameA,
00174                                  StringRef NameB,
00175                                  double AbsTol, double RelTol,
00176                                  std::string *Error) {
00177   // Now its safe to mmap the files into memory because both files
00178   // have a non-zero size.
00179   ErrorOr<std::unique_ptr<MemoryBuffer>> F1OrErr = MemoryBuffer::getFile(NameA);
00180   if (std::error_code EC = F1OrErr.getError()) {
00181     if (Error)
00182       *Error = EC.message();
00183     return 2;
00184   }
00185   MemoryBuffer &F1 = *F1OrErr.get();
00186 
00187   ErrorOr<std::unique_ptr<MemoryBuffer>> F2OrErr = MemoryBuffer::getFile(NameB);
00188   if (std::error_code EC = F2OrErr.getError()) {
00189     if (Error)
00190       *Error = EC.message();
00191     return 2;
00192   }
00193   MemoryBuffer &F2 = *F2OrErr.get();
00194 
00195   // Okay, now that we opened the files, scan them for the first difference.
00196   const char *File1Start = F1.getBufferStart();
00197   const char *File2Start = F2.getBufferStart();
00198   const char *File1End = F1.getBufferEnd();
00199   const char *File2End = F2.getBufferEnd();
00200   const char *F1P = File1Start;
00201   const char *F2P = File2Start;
00202   uint64_t A_size = F1.getBufferSize();
00203   uint64_t B_size = F2.getBufferSize();
00204 
00205   // Are the buffers identical?  Common case: Handle this efficiently.
00206   if (A_size == B_size &&
00207       std::memcmp(File1Start, File2Start, A_size) == 0)
00208     return 0;
00209 
00210   // Otherwise, we are done a tolerances are set.
00211   if (AbsTol == 0 && RelTol == 0) {
00212     if (Error)
00213       *Error = "Files differ without tolerance allowance";
00214     return 1;   // Files different!
00215   }
00216 
00217   bool CompareFailed = false;
00218   while (1) {
00219     // Scan for the end of file or next difference.
00220     while (F1P < File1End && F2P < File2End && *F1P == *F2P)
00221       ++F1P, ++F2P;
00222 
00223     if (F1P >= File1End || F2P >= File2End) break;
00224 
00225     // Okay, we must have found a difference.  Backup to the start of the
00226     // current number each stream is at so that we can compare from the
00227     // beginning.
00228     F1P = BackupNumber(F1P, File1Start);
00229     F2P = BackupNumber(F2P, File2Start);
00230 
00231     // Now that we are at the start of the numbers, compare them, exiting if
00232     // they don't match.
00233     if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) {
00234       CompareFailed = true;
00235       break;
00236     }
00237   }
00238 
00239   // Okay, we reached the end of file.  If both files are at the end, we
00240   // succeeded.
00241   bool F1AtEnd = F1P >= File1End;
00242   bool F2AtEnd = F2P >= File2End;
00243   if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) {
00244     // Else, we might have run off the end due to a number: backup and retry.
00245     if (F1AtEnd && isNumberChar(F1P[-1])) --F1P;
00246     if (F2AtEnd && isNumberChar(F2P[-1])) --F2P;
00247     F1P = BackupNumber(F1P, File1Start);
00248     F2P = BackupNumber(F2P, File2Start);
00249 
00250     // Now that we are at the start of the numbers, compare them, exiting if
00251     // they don't match.
00252     if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error))
00253       CompareFailed = true;
00254 
00255     // If we found the end, we succeeded.
00256     if (F1P < File1End || F2P < File2End)
00257       CompareFailed = true;
00258   }
00259 
00260   return CompareFailed;
00261 }