SuperTux: src/lisp/lexer.cpp Source File

00001 //  SuperTux
00002 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
00003 //
00004 //  This program is free software: you can redistribute it and/or modify
00005 //  it under the terms of the GNU General Public License as published by
00006 //  the Free Software Foundation, either version 3 of the License, or
00007 //  (at your option) any later version.
00008 //
00009 //  This program is distributed in the hope that it will be useful,
00010 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 //  GNU General Public License for more details.
00013 //
00014 //  You should have received a copy of the GNU General Public License
00015 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016 
00017 #include "lisp/lexer.hpp"
00018 
00019 #include <string.h>
00020 #include <sstream>
00021 #include <stdexcept>
00022 #include <stdio.h>
00023 
00024 namespace lisp {
00025 
00026 Lexer::Lexer(std::istream& newstream) :
00027   stream(newstream), 
00028   eof(false), 
00029   linenumber(0),
00030   bufend(),
00031   bufpos(),
00032   c(),
00033   token_length()
00034 {
00035   // trigger a refill of the buffer
00036   bufpos = NULL;
00037   bufend = NULL;
00038   nextChar();
00039 }
00040 
00041 Lexer::~Lexer()
00042 {
00043 }
00044 
00045 void
00046 Lexer::nextChar()
00047 {
00048   if(bufpos >= bufend) {
00049     if(eof) {
00050       c = EOF;
00051       return;
00052     }
00053     stream.read(buffer, BUFFER_SIZE);
00054     size_t bytes_read = stream.gcount();
00055 
00056     bufpos = buffer;
00057     bufend = buffer + bytes_read;
00058 
00059     // the following is a hack that appends an additional ' ' at the end of
00060     // the file to avoid problems when parsing symbols/elements and a sudden
00061     // EOF. This is faster than relying on unget and IMO also nicer.
00062     if(bytes_read == 0 || stream.eof()) {
00063       eof = true;
00064       *bufend = ' ';
00065       ++bufend;
00066     }
00067   }
00068   c = *bufpos++;
00069   if(c == '\n')
00070     ++linenumber;
00071 }
00072 
00073 void
00074 Lexer::addChar()
00075 {
00076   if(token_length < MAX_TOKEN_LENGTH)
00077     token_string[token_length++] = c;
00078   nextChar();
00079 }
00080 
00081 Lexer::TokenType
00082 Lexer::getNextToken()
00083 {
00084   static const char* delims = "\"();";
00085 
00086   while(isspace(c)) {
00087     nextChar();
00088   }
00089 
00090   token_length = 0;
00091 
00092   switch(c) {
00093     case ';': // comment
00094       while(c != '\n') {
00095         nextChar();
00096       }
00097       return getNextToken(); // and again
00098     case '(':
00099       nextChar();
00100       return TOKEN_OPEN_PAREN;
00101     case ')':
00102       nextChar();
00103       return TOKEN_CLOSE_PAREN;
00104     case '"': {  // string
00105       int startline = linenumber;
00106       while(1) {
00107         nextChar();
00108         switch(c) {
00109           case '"':
00110             nextChar();
00111             goto string_finished;
00112           case '\r':
00113             continue;
00114           case '\n':
00115             break;
00116           case '\\':
00117             nextChar();
00118             switch(c) {
00119               case 'n':
00120                 c = '\n';
00121                 break;
00122               case 't':
00123                 c = '\t';
00124                 break;
00125             }
00126             break;
00127           case EOF: {
00128             std::stringstream msg;
00129             msg << "Parse error in line " << startline << ": "
00130                 << "EOF while parsing string.";
00131             throw std::runtime_error(msg.str());
00132           }
00133           default:
00134             break;
00135         }
00136         if(token_length < MAX_TOKEN_LENGTH)
00137           token_string[token_length++] = c;
00138       }
00139       string_finished:
00140       token_string[token_length] = 0;
00141       return TOKEN_STRING;
00142     }
00143     case '#': // constant
00144       nextChar();
00145 
00146       while(isalnum(c) || c == '_') {
00147         addChar();
00148       }
00149       token_string[token_length] = 0;
00150 
00151       if(strcmp(token_string, "t") == 0)
00152         return TOKEN_TRUE;
00153       if(strcmp(token_string, "f") == 0)
00154         return TOKEN_FALSE;
00155 
00156       // we only handle #t and #f constants at the moment...
00157       {
00158         std::stringstream msg;
00159         msg << "Parse Error in line " << linenumber << ": "
00160             << "Unknown constant '" << token_string << "'.";
00161         throw std::runtime_error(msg.str());
00162       }
00163 
00164     case EOF:
00165       return TOKEN_EOF;
00166 
00167     default:
00168       if(isdigit(c) || c == '-') {
00169         bool have_nondigits = false;
00170         bool have_digits = false;
00171         int have_floating_point = 0;
00172 
00173         do {
00174           if(isdigit(c))
00175             have_digits = true;
00176           else if(c == '.')
00177             ++have_floating_point;
00178           else if(isalnum(c) || c == '_')
00179             have_nondigits = true;
00180 
00181           addChar();
00182         } while(!isspace(c) && !strchr(delims, c));
00183 
00184         token_string[token_length] = 0;
00185 
00186         // no nextChar
00187 
00188         if(have_nondigits || !have_digits || have_floating_point > 1)
00189           return TOKEN_SYMBOL;
00190         else if(have_floating_point == 1)
00191           return TOKEN_REAL;
00192         else
00193           return TOKEN_INTEGER;
00194       } else {
00195         do {
00196           addChar();
00197         } while(!isspace(c) && !strchr(delims, c));
00198         token_string[token_length] = 0;
00199 
00200         // no nextChar
00201 
00202         return TOKEN_SYMBOL;
00203       }
00204   }
00205 }
00206 
00207 } // end of namespace lisp
00208 
00209 /* EOF */