LLVM API Documentation
00001 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the class that reads LLVM sample profiles. It 00011 // supports two file formats: text and bitcode. The textual representation 00012 // is useful for debugging and testing purposes. The bitcode representation 00013 // is more compact, resulting in smaller file sizes. However, they can 00014 // both be used interchangeably. 00015 // 00016 // NOTE: If you are making changes to the file format, please remember 00017 // to document them in the Clang documentation at 00018 // tools/clang/docs/UsersManual.rst. 00019 // 00020 // Text format 00021 // ----------- 00022 // 00023 // Sample profiles are written as ASCII text. The file is divided into 00024 // sections, which correspond to each of the functions executed at runtime. 00025 // Each section has the following format 00026 // 00027 // function1:total_samples:total_head_samples 00028 // offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] 00029 // offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] 00030 // ... 00031 // offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] 00032 // 00033 // The file may contain blank lines between sections and within a 00034 // section. However, the spacing within a single line is fixed. Additional 00035 // spaces will result in an error while reading the file. 00036 // 00037 // Function names must be mangled in order for the profile loader to 00038 // match them in the current translation unit. The two numbers in the 00039 // function header specify how many total samples were accumulated in the 00040 // function (first number), and the total number of samples accumulated 00041 // in the prologue of the function (second number). This head sample 00042 // count provides an indicator of how frequently the function is invoked. 00043 // 00044 // Each sampled line may contain several items. Some are optional (marked 00045 // below): 00046 // 00047 // a. Source line offset. This number represents the line number 00048 // in the function where the sample was collected. The line number is 00049 // always relative to the line where symbol of the function is 00050 // defined. So, if the function has its header at line 280, the offset 00051 // 13 is at line 293 in the file. 00052 // 00053 // Note that this offset should never be a negative number. This could 00054 // happen in cases like macros. The debug machinery will register the 00055 // line number at the point of macro expansion. So, if the macro was 00056 // expanded in a line before the start of the function, the profile 00057 // converter should emit a 0 as the offset (this means that the optimizers 00058 // will not be able to associate a meaningful weight to the instructions 00059 // in the macro). 00060 // 00061 // b. [OPTIONAL] Discriminator. This is used if the sampled program 00062 // was compiled with DWARF discriminator support 00063 // (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). 00064 // DWARF discriminators are unsigned integer values that allow the 00065 // compiler to distinguish between multiple execution paths on the 00066 // same source line location. 00067 // 00068 // For example, consider the line of code ``if (cond) foo(); else bar();``. 00069 // If the predicate ``cond`` is true 80% of the time, then the edge 00070 // into function ``foo`` should be considered to be taken most of the 00071 // time. But both calls to ``foo`` and ``bar`` are at the same source 00072 // line, so a sample count at that line is not sufficient. The 00073 // compiler needs to know which part of that line is taken more 00074 // frequently. 00075 // 00076 // This is what discriminators provide. In this case, the calls to 00077 // ``foo`` and ``bar`` will be at the same line, but will have 00078 // different discriminator values. This allows the compiler to correctly 00079 // set edge weights into ``foo`` and ``bar``. 00080 // 00081 // c. Number of samples. This is an integer quantity representing the 00082 // number of samples collected by the profiler at this source 00083 // location. 00084 // 00085 // d. [OPTIONAL] Potential call targets and samples. If present, this 00086 // line contains a call instruction. This models both direct and 00087 // number of samples. For example, 00088 // 00089 // 130: 7 foo:3 bar:2 baz:7 00090 // 00091 // The above means that at relative line offset 130 there is a call 00092 // instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, 00093 // with ``baz()`` being the relatively more frequently called target. 00094 // 00095 //===----------------------------------------------------------------------===// 00096 00097 #include "llvm/ProfileData/SampleProfReader.h" 00098 #include "llvm/Support/Debug.h" 00099 #include "llvm/Support/ErrorOr.h" 00100 #include "llvm/Support/MemoryBuffer.h" 00101 #include "llvm/Support/LineIterator.h" 00102 #include "llvm/Support/Regex.h" 00103 00104 using namespace sampleprof; 00105 using namespace llvm; 00106 00107 /// \brief Print the samples collected for a function on stream \p OS. 00108 /// 00109 /// \param OS Stream to emit the output to. 00110 void FunctionSamples::print(raw_ostream &OS) { 00111 OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() 00112 << " sampled lines\n"; 00113 for (BodySampleMap::const_iterator SI = BodySamples.begin(), 00114 SE = BodySamples.end(); 00115 SI != SE; ++SI) 00116 OS << "\tline offset: " << SI->first.LineOffset 00117 << ", discriminator: " << SI->first.Discriminator 00118 << ", number of samples: " << SI->second << "\n"; 00119 OS << "\n"; 00120 } 00121 00122 /// \brief Print the function profile for \p FName on stream \p OS. 00123 /// 00124 /// \param OS Stream to emit the output to. 00125 /// \param FName Name of the function to print. 00126 void SampleProfileReader::printFunctionProfile(raw_ostream &OS, 00127 StringRef FName) { 00128 OS << "Function: " << FName << ":\n"; 00129 Profiles[FName].print(OS); 00130 } 00131 00132 /// \brief Dump the function profile for \p FName. 00133 /// 00134 /// \param FName Name of the function to print. 00135 void SampleProfileReader::dumpFunctionProfile(StringRef FName) { 00136 printFunctionProfile(dbgs(), FName); 00137 } 00138 00139 /// \brief Dump all the function profiles found. 00140 void SampleProfileReader::dump() { 00141 for (StringMap<FunctionSamples>::const_iterator I = Profiles.begin(), 00142 E = Profiles.end(); 00143 I != E; ++I) 00144 dumpFunctionProfile(I->getKey()); 00145 } 00146 00147 /// \brief Load samples from a text file. 00148 /// 00149 /// See the documentation at the top of the file for an explanation of 00150 /// the expected format. 00151 /// 00152 /// \returns true if the file was loaded successfully, false otherwise. 00153 bool SampleProfileReader::loadText() { 00154 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 00155 MemoryBuffer::getFile(Filename); 00156 if (std::error_code EC = BufferOrErr.getError()) { 00157 std::string Msg(EC.message()); 00158 M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg)); 00159 return false; 00160 } 00161 MemoryBuffer &Buffer = *BufferOrErr.get(); 00162 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 00163 00164 // Read the profile of each function. Since each function may be 00165 // mentioned more than once, and we are collecting flat profiles, 00166 // accumulate samples as we parse them. 00167 Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$"); 00168 Regex LineSample("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); 00169 while (!LineIt.is_at_eof()) { 00170 // Read the header of each function. 00171 // 00172 // Note that for function identifiers we are actually expecting 00173 // mangled names, but we may not always get them. This happens when 00174 // the compiler decides not to emit the function (e.g., it was inlined 00175 // and removed). In this case, the binary will not have the linkage 00176 // name for the function, so the profiler will emit the function's 00177 // unmangled name, which may contain characters like ':' and '>' in its 00178 // name (member functions, templates, etc). 00179 // 00180 // The only requirement we place on the identifier, then, is that it 00181 // should not begin with a number. 00182 SmallVector<StringRef, 3> Matches; 00183 if (!HeadRE.match(*LineIt, &Matches)) { 00184 reportParseError(LineIt.line_number(), 00185 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 00186 return false; 00187 } 00188 assert(Matches.size() == 4); 00189 StringRef FName = Matches[1]; 00190 unsigned NumSamples, NumHeadSamples; 00191 Matches[2].getAsInteger(10, NumSamples); 00192 Matches[3].getAsInteger(10, NumHeadSamples); 00193 Profiles[FName] = FunctionSamples(); 00194 FunctionSamples &FProfile = Profiles[FName]; 00195 FProfile.addTotalSamples(NumSamples); 00196 FProfile.addHeadSamples(NumHeadSamples); 00197 ++LineIt; 00198 00199 // Now read the body. The body of the function ends when we reach 00200 // EOF or when we see the start of the next function. 00201 while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { 00202 if (!LineSample.match(*LineIt, &Matches)) { 00203 reportParseError( 00204 LineIt.line_number(), 00205 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); 00206 return false; 00207 } 00208 assert(Matches.size() == 5); 00209 unsigned LineOffset, NumSamples, Discriminator = 0; 00210 Matches[1].getAsInteger(10, LineOffset); 00211 if (Matches[2] != "") 00212 Matches[2].getAsInteger(10, Discriminator); 00213 Matches[3].getAsInteger(10, NumSamples); 00214 00215 // FIXME: Handle called targets (in Matches[4]). 00216 00217 // When dealing with instruction weights, we use the value 00218 // zero to indicate the absence of a sample. If we read an 00219 // actual zero from the profile file, return it as 1 to 00220 // avoid the confusion later on. 00221 if (NumSamples == 0) 00222 NumSamples = 1; 00223 FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); 00224 ++LineIt; 00225 } 00226 } 00227 00228 return true; 00229 } 00230 00231 /// \brief Load execution samples from a file. 00232 /// 00233 /// This function examines the header of the given file to determine 00234 /// whether to use the text or the bitcode loader. 00235 bool SampleProfileReader::load() { 00236 // TODO Actually detect the file format. 00237 return loadText(); 00238 }