Ninja
lexer.in.cc
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //     http://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #include "lexer.h"
00016 
00017 #include <stdio.h>
00018 
00019 #include "eval_env.h"
00020 #include "util.h"
00021 
00022 bool Lexer::Error(const string& message, string* err) {
00023   // Compute line/column.
00024   int line = 1;
00025   const char* context = input_.str_;
00026   for (const char* p = input_.str_; p < last_token_; ++p) {
00027     if (*p == '\n') {
00028       ++line;
00029       context = p + 1;
00030     }
00031   }
00032   int col = last_token_ ? (int)(last_token_ - context) : 0;
00033 
00034   char buf[1024];
00035   snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
00036   *err = buf;
00037   *err += message + "\n";
00038 
00039   // Add some context to the message.
00040   const int kTruncateColumn = 72;
00041   if (col > 0 && col < kTruncateColumn) {
00042     int len;
00043     bool truncated = true;
00044     for (len = 0; len < kTruncateColumn; ++len) {
00045       if (context[len] == 0 || context[len] == '\n') {
00046         truncated = false;
00047         break;
00048       }
00049     }
00050     *err += string(context, len);
00051     if (truncated)
00052       *err += "...";
00053     *err += "\n";
00054     *err += string(col, ' ');
00055     *err += "^ near here";
00056   }
00057 
00058   return false;
00059 }
00060 
00061 Lexer::Lexer(const char* input) {
00062   Start("input", input);
00063 }
00064 
00065 void Lexer::Start(StringPiece filename, StringPiece input) {
00066   filename_ = filename;
00067   input_ = input;
00068   ofs_ = input_.str_;
00069   last_token_ = NULL;
00070 }
00071 
00072 const char* Lexer::TokenName(Token t) {
00073   switch (t) {
00074   case ERROR:    return "lexing error";
00075   case BUILD:    return "'build'";
00076   case COLON:    return "':'";
00077   case DEFAULT:  return "'default'";
00078   case EQUALS:   return "'='";
00079   case IDENT:    return "identifier";
00080   case INCLUDE:  return "'include'";
00081   case INDENT:   return "indent";
00082   case NEWLINE:  return "newline";
00083   case PIPE2:    return "'||'";
00084   case PIPE:     return "'|'";
00085   case POOL:     return "'pool'";
00086   case RULE:     return "'rule'";
00087   case SUBNINJA: return "'subninja'";
00088   case TEOF:     return "eof";
00089   }
00090   return NULL;  // not reached
00091 }
00092 
00093 const char* Lexer::TokenErrorHint(Token expected) {
00094   switch (expected) {
00095   case COLON:
00096     return " ($ also escapes ':')";
00097   default:
00098     return "";
00099   }
00100 }
00101 
00102 string Lexer::DescribeLastError() {
00103   if (last_token_) {
00104     switch (last_token_[0]) {
00105     case '\r':
00106       return "carriage returns are not allowed, use newlines";
00107     case '\t':
00108       return "tabs are not allowed, use spaces";
00109     }
00110   }
00111   return "lexing error";
00112 }
00113 
00114 void Lexer::UnreadToken() {
00115   ofs_ = last_token_;
00116 }
00117 
00118 Lexer::Token Lexer::ReadToken() {
00119   const char* p = ofs_;
00120   const char* q;
00121   const char* start;
00122   Lexer::Token token;
00123   for (;;) {
00124     start = p;
00125     /*!re2c
00126     re2c:define:YYCTYPE = "unsigned char";
00127     re2c:define:YYCURSOR = p;
00128     re2c:define:YYMARKER = q;
00129     re2c:yyfill:enable = 0;
00130 
00131     nul = "\000";
00132     simple_varname = [a-zA-Z0-9_-]+;
00133     varname = [a-zA-Z0-9_.-]+;
00134 
00135     [ ]*"#"[^\000\r\n]*"\n" { continue; }
00136     [ ]*[\n]   { token = NEWLINE;  break; }
00137     [ ]+       { token = INDENT;   break; }
00138     "build"    { token = BUILD;    break; }
00139     "pool"     { token = POOL;     break; }
00140     "rule"     { token = RULE;     break; }
00141     "default"  { token = DEFAULT;  break; }
00142     "="        { token = EQUALS;   break; }
00143     ":"        { token = COLON;    break; }
00144     "||"       { token = PIPE2;    break; }
00145     "|"        { token = PIPE;     break; }
00146     "include"  { token = INCLUDE;  break; }
00147     "subninja" { token = SUBNINJA; break; }
00148     varname    { token = IDENT;    break; }
00149     nul        { token = TEOF;     break; }
00150     [^]        { token = ERROR;    break; }
00151     */
00152   }
00153 
00154   last_token_ = start;
00155   ofs_ = p;
00156   if (token != NEWLINE && token != TEOF)
00157     EatWhitespace();
00158   return token;
00159 }
00160 
00161 bool Lexer::PeekToken(Token token) {
00162   Token t = ReadToken();
00163   if (t == token)
00164     return true;
00165   UnreadToken();
00166   return false;
00167 }
00168 
00169 void Lexer::EatWhitespace() {
00170   const char* p = ofs_;
00171   for (;;) {
00172     ofs_ = p;
00173     /*!re2c
00174     [ ]+  { continue; }
00175     "$\n" { continue; }
00176     nul   { break; }
00177     [^]   { break; }
00178     */
00179   }
00180 }
00181 
00182 bool Lexer::ReadIdent(string* out) {
00183   const char* p = ofs_;
00184   for (;;) {
00185     const char* start = p;
00186     /*!re2c
00187     varname {
00188       out->assign(start, p - start);
00189       break;
00190     }
00191     [^] { return false; }
00192     */
00193   }
00194   ofs_ = p;
00195   EatWhitespace();
00196   return true;
00197 }
00198 
00199 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
00200   const char* p = ofs_;
00201   const char* q;
00202   const char* start;
00203   for (;;) {
00204     start = p;
00205     /*!re2c
00206     [^$ :\r\n|\000]+ {
00207       eval->AddText(StringPiece(start, p - start));
00208       continue;
00209     }
00210     [ :|\n] {
00211       if (path) {
00212         p = start;
00213         break;
00214       } else {
00215         if (*start == '\n')
00216           break;
00217         eval->AddText(StringPiece(start, 1));
00218         continue;
00219       }
00220     }
00221     "$$" {
00222       eval->AddText(StringPiece("$", 1));
00223       continue;
00224     }
00225     "$ " {
00226       eval->AddText(StringPiece(" ", 1));
00227       continue;
00228     }
00229     "$\n"[ ]* {
00230       continue;
00231     }
00232     "${"varname"}" {
00233       eval->AddSpecial(StringPiece(start + 2, p - start - 3));
00234       continue;
00235     }
00236     "$"simple_varname {
00237       eval->AddSpecial(StringPiece(start + 1, p - start - 1));
00238       continue;
00239     }
00240     "$:" {
00241       eval->AddText(StringPiece(":", 1));
00242       continue;
00243     }
00244     "$". {
00245       last_token_ = start;
00246       return Error("bad $-escape (literal $ must be written as $$)", err);
00247     }
00248     nul {
00249       last_token_ = start;
00250       return Error("unexpected EOF", err);
00251     }
00252     [^] {
00253       last_token_ = start;
00254       return Error(DescribeLastError(), err);
00255     }
00256     */
00257   }
00258   last_token_ = start;
00259   ofs_ = p;
00260   if (path)
00261     EatWhitespace();
00262   // Non-path strings end in newlines, so there's no whitespace to eat.
00263   return true;
00264 }