lisp::Lexer Class Reference

#include <lexer.hpp>

List of all members.

Public Types

enum  TokenType {
  TOKEN_EOF, TOKEN_OPEN_PAREN, TOKEN_CLOSE_PAREN, TOKEN_SYMBOL,
  TOKEN_STRING, TOKEN_INTEGER, TOKEN_REAL, TOKEN_TRUE,
  TOKEN_FALSE
}

Public Member Functions

 Lexer (std::istream &stream)
 ~Lexer ()
TokenType getNextToken ()
const char * getString () const
int getLineNumber () const

Private Types

enum  { MAX_TOKEN_LENGTH = 16384, BUFFER_SIZE = 1024 }

Private Member Functions

void nextChar ()
void addChar ()
 Lexer (const Lexer &)
Lexeroperator= (const Lexer &)

Private Attributes

std::istream & stream
bool eof
int linenumber
char buffer [BUFFER_SIZE+1]
char * bufend
char * bufpos
int c
char token_string [MAX_TOKEN_LENGTH+1]
int token_length


Detailed Description

Definition at line 24 of file lexer.hpp.


Member Enumeration Documentation

enum lisp::Lexer::TokenType

Enumerator:
TOKEN_EOF 
TOKEN_OPEN_PAREN 
TOKEN_CLOSE_PAREN 
TOKEN_SYMBOL 
TOKEN_STRING 
TOKEN_INTEGER 
TOKEN_REAL 
TOKEN_TRUE 
TOKEN_FALSE 

Definition at line 27 of file lexer.hpp.

00027                  {
00028     TOKEN_EOF,
00029     TOKEN_OPEN_PAREN,
00030     TOKEN_CLOSE_PAREN,
00031     TOKEN_SYMBOL,
00032     TOKEN_STRING,
00033     TOKEN_INTEGER,
00034     TOKEN_REAL,
00035     TOKEN_TRUE,
00036     TOKEN_FALSE
00037   };

anonymous enum [private]

Enumerator:
MAX_TOKEN_LENGTH 
BUFFER_SIZE 

Definition at line 49 of file lexer.hpp.

00049        {
00050     MAX_TOKEN_LENGTH = 16384,
00051     BUFFER_SIZE = 1024
00052   };


Constructor & Destructor Documentation

lisp::Lexer::Lexer ( std::istream &  stream  ) 

Definition at line 26 of file lexer.cpp.

References bufend, bufpos, and nextChar().

00026                                   :
00027   stream(newstream), 
00028   eof(false), 
00029   linenumber(0),
00030   bufend(),
00031   bufpos(),
00032   c(),
00033   token_length()
00034 {
00035   // trigger a refill of the buffer
00036   bufpos = NULL;
00037   bufend = NULL;
00038   nextChar();
00039 }

lisp::Lexer::~Lexer (  ) 

Definition at line 41 of file lexer.cpp.

00042 {
00043 }

lisp::Lexer::Lexer ( const Lexer  )  [private]


Member Function Documentation

Lexer::TokenType lisp::Lexer::getNextToken (  ) 

Definition at line 82 of file lexer.cpp.

References addChar(), c, linenumber, MAX_TOKEN_LENGTH, nextChar(), TOKEN_CLOSE_PAREN, TOKEN_EOF, TOKEN_FALSE, TOKEN_INTEGER, token_length, TOKEN_OPEN_PAREN, TOKEN_REAL, TOKEN_STRING, token_string, TOKEN_SYMBOL, and TOKEN_TRUE.

Referenced by lisp::Parser::parse(), and lisp::Parser::read().

00083 {
00084   static const char* delims = "\"();";
00085 
00086   while(isspace(c)) {
00087     nextChar();
00088   }
00089 
00090   token_length = 0;
00091 
00092   switch(c) {
00093     case ';': // comment
00094       while(c != '\n') {
00095         nextChar();
00096       }
00097       return getNextToken(); // and again
00098     case '(':
00099       nextChar();
00100       return TOKEN_OPEN_PAREN;
00101     case ')':
00102       nextChar();
00103       return TOKEN_CLOSE_PAREN;
00104     case '"': {  // string
00105       int startline = linenumber;
00106       while(1) {
00107         nextChar();
00108         switch(c) {
00109           case '"':
00110             nextChar();
00111             goto string_finished;
00112           case '\r':
00113             continue;
00114           case '\n':
00115             break;
00116           case '\\':
00117             nextChar();
00118             switch(c) {
00119               case 'n':
00120                 c = '\n';
00121                 break;
00122               case 't':
00123                 c = '\t';
00124                 break;
00125             }
00126             break;
00127           case EOF: {
00128             std::stringstream msg;
00129             msg << "Parse error in line " << startline << ": "
00130                 << "EOF while parsing string.";
00131             throw std::runtime_error(msg.str());
00132           }
00133           default:
00134             break;
00135         }
00136         if(token_length < MAX_TOKEN_LENGTH)
00137           token_string[token_length++] = c;
00138       }
00139       string_finished:
00140       token_string[token_length] = 0;
00141       return TOKEN_STRING;
00142     }
00143     case '#': // constant
00144       nextChar();
00145 
00146       while(isalnum(c) || c == '_') {
00147         addChar();
00148       }
00149       token_string[token_length] = 0;
00150 
00151       if(strcmp(token_string, "t") == 0)
00152         return TOKEN_TRUE;
00153       if(strcmp(token_string, "f") == 0)
00154         return TOKEN_FALSE;
00155 
00156       // we only handle #t and #f constants at the moment...
00157       {
00158         std::stringstream msg;
00159         msg << "Parse Error in line " << linenumber << ": "
00160             << "Unknown constant '" << token_string << "'.";
00161         throw std::runtime_error(msg.str());
00162       }
00163 
00164     case EOF:
00165       return TOKEN_EOF;
00166 
00167     default:
00168       if(isdigit(c) || c == '-') {
00169         bool have_nondigits = false;
00170         bool have_digits = false;
00171         int have_floating_point = 0;
00172 
00173         do {
00174           if(isdigit(c))
00175             have_digits = true;
00176           else if(c == '.')
00177             ++have_floating_point;
00178           else if(isalnum(c) || c == '_')
00179             have_nondigits = true;
00180 
00181           addChar();
00182         } while(!isspace(c) && !strchr(delims, c));
00183 
00184         token_string[token_length] = 0;
00185 
00186         // no nextChar
00187 
00188         if(have_nondigits || !have_digits || have_floating_point > 1)
00189           return TOKEN_SYMBOL;
00190         else if(have_floating_point == 1)
00191           return TOKEN_REAL;
00192         else
00193           return TOKEN_INTEGER;
00194       } else {
00195         do {
00196           addChar();
00197         } while(!isspace(c) && !strchr(delims, c));
00198         token_string[token_length] = 0;
00199 
00200         // no nextChar
00201 
00202         return TOKEN_SYMBOL;
00203       }
00204   }
00205 }

const char* lisp::Lexer::getString (  )  const [inline]

Definition at line 43 of file lexer.hpp.

References token_string.

Referenced by lisp::Parser::read().

00044   { return token_string; }

int lisp::Lexer::getLineNumber (  )  const [inline]

Definition at line 45 of file lexer.hpp.

References linenumber.

Referenced by lisp::Parser::parse_error().

00046   { return linenumber; }

void lisp::Lexer::nextChar (  )  [inline, private]

Definition at line 46 of file lexer.cpp.

References bufend, buffer, BUFFER_SIZE, bufpos, c, eof, linenumber, and stream.

Referenced by addChar(), getNextToken(), and Lexer().

00047 {
00048   if(bufpos >= bufend) {
00049     if(eof) {
00050       c = EOF;
00051       return;
00052     }
00053     stream.read(buffer, BUFFER_SIZE);
00054     size_t bytes_read = stream.gcount();
00055 
00056     bufpos = buffer;
00057     bufend = buffer + bytes_read;
00058 
00059     // the following is a hack that appends an additional ' ' at the end of
00060     // the file to avoid problems when parsing symbols/elements and a sudden
00061     // EOF. This is faster than relying on unget and IMO also nicer.
00062     if(bytes_read == 0 || stream.eof()) {
00063       eof = true;
00064       *bufend = ' ';
00065       ++bufend;
00066     }
00067   }
00068   c = *bufpos++;
00069   if(c == '\n')
00070     ++linenumber;
00071 }

void lisp::Lexer::addChar (  )  [inline, private]

Definition at line 74 of file lexer.cpp.

References c, MAX_TOKEN_LENGTH, nextChar(), token_length, and token_string.

Referenced by getNextToken().

00075 {
00076   if(token_length < MAX_TOKEN_LENGTH)
00077     token_string[token_length++] = c;
00078   nextChar();
00079 }

Lexer& lisp::Lexer::operator= ( const Lexer  )  [private]


Member Data Documentation

std::istream& lisp::Lexer::stream [private]

Definition at line 58 of file lexer.hpp.

Referenced by nextChar().

bool lisp::Lexer::eof [private]

Definition at line 59 of file lexer.hpp.

Referenced by nextChar().

int lisp::Lexer::linenumber [private]

Definition at line 60 of file lexer.hpp.

Referenced by getLineNumber(), getNextToken(), and nextChar().

char lisp::Lexer::buffer[BUFFER_SIZE+1] [private]

Definition at line 61 of file lexer.hpp.

Referenced by nextChar().

char* lisp::Lexer::bufend [private]

Definition at line 62 of file lexer.hpp.

Referenced by Lexer(), and nextChar().

char* lisp::Lexer::bufpos [private]

Definition at line 63 of file lexer.hpp.

Referenced by Lexer(), and nextChar().

int lisp::Lexer::c [private]

Definition at line 64 of file lexer.hpp.

Referenced by addChar(), getNextToken(), and nextChar().

char lisp::Lexer::token_string[MAX_TOKEN_LENGTH+1] [private]

Definition at line 65 of file lexer.hpp.

Referenced by addChar(), getNextToken(), and getString().

int lisp::Lexer::token_length [private]

Definition at line 66 of file lexer.hpp.

Referenced by addChar(), and getNextToken().


The documentation for this class was generated from the following files:
Generated on Mon Jun 9 03:38:38 2014 for SuperTux by  doxygen 1.5.1