#include <lexer.hpp>
Public Types | |
enum | TokenType { TOKEN_EOF, TOKEN_OPEN_PAREN, TOKEN_CLOSE_PAREN, TOKEN_SYMBOL, TOKEN_STRING, TOKEN_INTEGER, TOKEN_REAL, TOKEN_TRUE, TOKEN_FALSE } |
Public Member Functions | |
Lexer (std::istream &stream) | |
~Lexer () | |
TokenType | getNextToken () |
const char * | getString () const |
int | getLineNumber () const |
Private Types | |
enum | { MAX_TOKEN_LENGTH = 16384, BUFFER_SIZE = 1024 } |
Private Member Functions | |
void | nextChar () |
void | addChar () |
Lexer (const Lexer &) | |
Lexer & | operator= (const Lexer &) |
Private Attributes | |
std::istream & | stream |
bool | eof |
int | linenumber |
char | buffer [BUFFER_SIZE+1] |
char * | bufend |
char * | bufpos |
int | c |
char | token_string [MAX_TOKEN_LENGTH+1] |
int | token_length |
Definition at line 24 of file lexer.hpp.
TOKEN_EOF | |
TOKEN_OPEN_PAREN | |
TOKEN_CLOSE_PAREN | |
TOKEN_SYMBOL | |
TOKEN_STRING | |
TOKEN_INTEGER | |
TOKEN_REAL | |
TOKEN_TRUE | |
TOKEN_FALSE |
Definition at line 27 of file lexer.hpp.
00027 { 00028 TOKEN_EOF, 00029 TOKEN_OPEN_PAREN, 00030 TOKEN_CLOSE_PAREN, 00031 TOKEN_SYMBOL, 00032 TOKEN_STRING, 00033 TOKEN_INTEGER, 00034 TOKEN_REAL, 00035 TOKEN_TRUE, 00036 TOKEN_FALSE 00037 };
anonymous enum [private] |
Definition at line 49 of file lexer.hpp.
00049 { 00050 MAX_TOKEN_LENGTH = 16384, 00051 BUFFER_SIZE = 1024 00052 };
lisp::Lexer::Lexer | ( | std::istream & | stream | ) |
Definition at line 26 of file lexer.cpp.
References bufend, bufpos, and nextChar().
00026 : 00027 stream(newstream), 00028 eof(false), 00029 linenumber(0), 00030 bufend(), 00031 bufpos(), 00032 c(), 00033 token_length() 00034 { 00035 // trigger a refill of the buffer 00036 bufpos = NULL; 00037 bufend = NULL; 00038 nextChar(); 00039 }
lisp::Lexer::Lexer | ( | const Lexer & | ) | [private] |
Lexer::TokenType lisp::Lexer::getNextToken | ( | ) |
Definition at line 82 of file lexer.cpp.
References addChar(), c, linenumber, MAX_TOKEN_LENGTH, nextChar(), TOKEN_CLOSE_PAREN, TOKEN_EOF, TOKEN_FALSE, TOKEN_INTEGER, token_length, TOKEN_OPEN_PAREN, TOKEN_REAL, TOKEN_STRING, token_string, TOKEN_SYMBOL, and TOKEN_TRUE.
Referenced by lisp::Parser::parse(), and lisp::Parser::read().
00083 { 00084 static const char* delims = "\"();"; 00085 00086 while(isspace(c)) { 00087 nextChar(); 00088 } 00089 00090 token_length = 0; 00091 00092 switch(c) { 00093 case ';': // comment 00094 while(c != '\n') { 00095 nextChar(); 00096 } 00097 return getNextToken(); // and again 00098 case '(': 00099 nextChar(); 00100 return TOKEN_OPEN_PAREN; 00101 case ')': 00102 nextChar(); 00103 return TOKEN_CLOSE_PAREN; 00104 case '"': { // string 00105 int startline = linenumber; 00106 while(1) { 00107 nextChar(); 00108 switch(c) { 00109 case '"': 00110 nextChar(); 00111 goto string_finished; 00112 case '\r': 00113 continue; 00114 case '\n': 00115 break; 00116 case '\\': 00117 nextChar(); 00118 switch(c) { 00119 case 'n': 00120 c = '\n'; 00121 break; 00122 case 't': 00123 c = '\t'; 00124 break; 00125 } 00126 break; 00127 case EOF: { 00128 std::stringstream msg; 00129 msg << "Parse error in line " << startline << ": " 00130 << "EOF while parsing string."; 00131 throw std::runtime_error(msg.str()); 00132 } 00133 default: 00134 break; 00135 } 00136 if(token_length < MAX_TOKEN_LENGTH) 00137 token_string[token_length++] = c; 00138 } 00139 string_finished: 00140 token_string[token_length] = 0; 00141 return TOKEN_STRING; 00142 } 00143 case '#': // constant 00144 nextChar(); 00145 00146 while(isalnum(c) || c == '_') { 00147 addChar(); 00148 } 00149 token_string[token_length] = 0; 00150 00151 if(strcmp(token_string, "t") == 0) 00152 return TOKEN_TRUE; 00153 if(strcmp(token_string, "f") == 0) 00154 return TOKEN_FALSE; 00155 00156 // we only handle #t and #f constants at the moment... 00157 { 00158 std::stringstream msg; 00159 msg << "Parse Error in line " << linenumber << ": " 00160 << "Unknown constant '" << token_string << "'."; 00161 throw std::runtime_error(msg.str()); 00162 } 00163 00164 case EOF: 00165 return TOKEN_EOF; 00166 00167 default: 00168 if(isdigit(c) || c == '-') { 00169 bool have_nondigits = false; 00170 bool have_digits = false; 00171 int have_floating_point = 0; 00172 00173 do { 00174 if(isdigit(c)) 00175 have_digits = true; 00176 else if(c == '.') 00177 ++have_floating_point; 00178 else if(isalnum(c) || c == '_') 00179 have_nondigits = true; 00180 00181 addChar(); 00182 } while(!isspace(c) && !strchr(delims, c)); 00183 00184 token_string[token_length] = 0; 00185 00186 // no nextChar 00187 00188 if(have_nondigits || !have_digits || have_floating_point > 1) 00189 return TOKEN_SYMBOL; 00190 else if(have_floating_point == 1) 00191 return TOKEN_REAL; 00192 else 00193 return TOKEN_INTEGER; 00194 } else { 00195 do { 00196 addChar(); 00197 } while(!isspace(c) && !strchr(delims, c)); 00198 token_string[token_length] = 0; 00199 00200 // no nextChar 00201 00202 return TOKEN_SYMBOL; 00203 } 00204 } 00205 }
const char* lisp::Lexer::getString | ( | ) | const [inline] |
Definition at line 43 of file lexer.hpp.
References token_string.
Referenced by lisp::Parser::read().
00044 { return token_string; }
int lisp::Lexer::getLineNumber | ( | ) | const [inline] |
Definition at line 45 of file lexer.hpp.
References linenumber.
Referenced by lisp::Parser::parse_error().
00046 { return linenumber; }
void lisp::Lexer::nextChar | ( | ) | [inline, private] |
Definition at line 46 of file lexer.cpp.
References bufend, buffer, BUFFER_SIZE, bufpos, c, eof, linenumber, and stream.
Referenced by addChar(), getNextToken(), and Lexer().
00047 { 00048 if(bufpos >= bufend) { 00049 if(eof) { 00050 c = EOF; 00051 return; 00052 } 00053 stream.read(buffer, BUFFER_SIZE); 00054 size_t bytes_read = stream.gcount(); 00055 00056 bufpos = buffer; 00057 bufend = buffer + bytes_read; 00058 00059 // the following is a hack that appends an additional ' ' at the end of 00060 // the file to avoid problems when parsing symbols/elements and a sudden 00061 // EOF. This is faster than relying on unget and IMO also nicer. 00062 if(bytes_read == 0 || stream.eof()) { 00063 eof = true; 00064 *bufend = ' '; 00065 ++bufend; 00066 } 00067 } 00068 c = *bufpos++; 00069 if(c == '\n') 00070 ++linenumber; 00071 }
void lisp::Lexer::addChar | ( | ) | [inline, private] |
Definition at line 74 of file lexer.cpp.
References c, MAX_TOKEN_LENGTH, nextChar(), token_length, and token_string.
Referenced by getNextToken().
00075 { 00076 if(token_length < MAX_TOKEN_LENGTH) 00077 token_string[token_length++] = c; 00078 nextChar(); 00079 }
std::istream& lisp::Lexer::stream [private] |
bool lisp::Lexer::eof [private] |
int lisp::Lexer::linenumber [private] |
Definition at line 60 of file lexer.hpp.
Referenced by getLineNumber(), getNextToken(), and nextChar().
char lisp::Lexer::buffer[BUFFER_SIZE+1] [private] |
char* lisp::Lexer::bufend [private] |
char* lisp::Lexer::bufpos [private] |
int lisp::Lexer::c [private] |
char lisp::Lexer::token_string[MAX_TOKEN_LENGTH+1] [private] |
int lisp::Lexer::token_length [private] |