TinyGetText::POFileReader Class Reference

List of all members.

Public Member Functions

 POFileReader (std::istream &in_, Dictionary &dict_)
void parse_header (const std::string &header)
void nextChar ()
void skipSpace ()
bool expectToken (std::string type, Token wanted)
bool expectContent (std::string type, std::string wanted)
void tokenize_po ()
Token nextToken ()

Private Types

enum  Token { TOKEN_KEYWORD, TOKEN_CONTENT, TOKEN_EOF }

Private Attributes

std::istream & in
Dictionarydict
std::string from_charset
std::string to_charset
int line_num
int c
Token token
std::string tokenContent

Detailed Description

Definition at line 541 of file tinygettext.cpp.


Member Enumeration Documentation

enum TinyGetText::POFileReader::Token [private]

Enumerator:
TOKEN_KEYWORD 
TOKEN_CONTENT 
TOKEN_EOF 

Definition at line 552 of file tinygettext.cpp.

00552              {
00553       TOKEN_KEYWORD, //msgstr, msgid, etc.
00554       TOKEN_CONTENT, //string literals, concatenated ("" "foo\n" "bar\n" -> "foo\nbar\n")
00555       TOKEN_EOF      //ran out of tokens
00556   };


Constructor & Destructor Documentation

TinyGetText::POFileReader::POFileReader ( std::istream &  in_,
Dictionary dict_ 
) [inline]

Definition at line 561 of file tinygettext.cpp.

References c, line_num, nextChar(), and tokenize_po().

00562     : in(in_), dict(dict_)
00563   {
00564     line_num = 0;
00565     nextChar();
00566     if(c == 0xef) { // skip UTF-8 intro that some text editors produce
00567         nextChar();
00568         nextChar();
00569         nextChar();
00570     }
00571     tokenize_po();
00572   }


Member Function Documentation

void TinyGetText::POFileReader::parse_header ( const std::string &  header  )  [inline]

Definition at line 574 of file tinygettext.cpp.

References dict, from_charset, TinyGetText::Dictionary::get_charset(), TinyGetText::has_prefix(), log_warning, TinyGetText::Dictionary::set_charset(), and to_charset.

Referenced by tokenize_po().

00575   {
00576     // Separate the header in lines
00577     typedef std::vector<std::string> Lines;
00578     Lines lines;
00579 
00580     std::string::size_type start = 0;
00581     for(std::string::size_type i = 0; i < header.length(); ++i)
00582       {
00583         if (header[i] == '\n')
00584           {
00585             lines.push_back(header.substr(start, i - start));
00586             start = i+1;
00587           }
00588       }
00589 
00590     for(Lines::iterator i = lines.begin(); i != lines.end(); ++i)
00591       {
00592         if (has_prefix(*i, "Content-Type: text/plain; charset=")) {
00593           from_charset = i->substr(strlen("Content-Type: text/plain; charset="));
00594         }
00595       }
00596 
00597     if (from_charset.empty() || from_charset == "CHARSET")
00598       {
00599         log_warning << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl;
00600         from_charset = "ISO-8859-1";
00601       }
00602 
00603     to_charset = dict.get_charset();
00604     if (to_charset.empty())
00605       { // No charset requested from the dict, use utf-8
00606         to_charset = "utf-8";
00607         dict.set_charset(from_charset);
00608       }
00609   }

void TinyGetText::POFileReader::nextChar (  )  [inline]

Definition at line 611 of file tinygettext.cpp.

References c, in, and line_num.

Referenced by nextToken(), POFileReader(), and skipSpace().

00612   {
00613     c = in.get();
00614     if (c == '\n')
00615       line_num++;
00616   }

void TinyGetText::POFileReader::skipSpace (  )  [inline]

Definition at line 618 of file tinygettext.cpp.

References c, and nextChar().

Referenced by nextToken().

00619   {
00620     if(c == EOF)
00621       return;
00622 
00623     while(c == '#' || isspace(static_cast<unsigned char>(c))) {
00624       if(c == '#') {
00625         while(c != '\n' && c != EOF) nextChar();
00626       }
00627       nextChar();
00628     }
00629   }

bool TinyGetText::POFileReader::expectToken ( std::string  type,
Token  wanted 
) [inline]

Definition at line 631 of file tinygettext.cpp.

References line_num, log_warning, token, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.

Referenced by tokenize_po().

00631                                                         {
00632      if(token != wanted) {
00633         log_warning << "Expected " << type << ", got ";
00634         if(token == TOKEN_EOF)
00635           log_warning << "EOF";
00636         else if(token == TOKEN_KEYWORD)
00637           log_warning << "keyword '" << tokenContent << "'";
00638         else
00639           log_warning << "string \"" << tokenContent << '"';
00640 
00641         log_warning << " at line " << line_num << std::endl;
00642         return false;
00643      }
00644      return true;
00645   }

bool TinyGetText::POFileReader::expectContent ( std::string  type,
std::string  wanted 
) [inline]

Definition at line 647 of file tinygettext.cpp.

References line_num, log_warning, token, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.

Referenced by tokenize_po().

00647                                                               {
00648      if(tokenContent != wanted) {
00649         log_warning << "Expected " << type << ", got ";
00650         if(token == TOKEN_EOF)
00651           log_warning << "EOF";
00652         else if(token == TOKEN_KEYWORD)
00653           log_warning << "keyword '" << tokenContent << "'";
00654         else
00655           log_warning << "string \"" << tokenContent << '"';
00656 
00657         log_warning << " at line " << line_num << std::endl;
00658         return false;
00659      }
00660      return true;
00661   }

void TinyGetText::POFileReader::tokenize_po (  )  [inline]

Definition at line 663 of file tinygettext.cpp.

References TinyGetText::Dictionary::add_translation(), TinyGetText::convert(), dict, expectContent(), expectToken(), from_charset, TinyGetText::has_prefix(), log_warning, nextToken(), parse_header(), to_charset, token, TOKEN_CONTENT, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.

Referenced by POFileReader().

00664     {
00665       token = nextToken();
00666       while(token != TOKEN_EOF)
00667         {
00668           if(!expectToken("'msgid' keyword", TOKEN_KEYWORD) || !expectContent("'msgid' keyword", "msgid")) break;
00669 
00670           token = nextToken();
00671           if(!expectToken("name after msgid", TOKEN_CONTENT)) break;
00672           std::string current_msgid = tokenContent;
00673 
00674           token = nextToken();
00675           if(!expectToken("msgstr or msgid_plural", TOKEN_KEYWORD)) break;
00676           if(tokenContent == "msgid_plural")
00677             {
00678               //Plural form
00679               token = nextToken();
00680               if(!expectToken("msgid_plural content", TOKEN_CONTENT)) break;
00681               std::string current_msgid_plural = tokenContent;
00682 
00683               std::map<int, std::string> msgstr_plural;
00684               while((token = nextToken()) == TOKEN_KEYWORD && has_prefix(tokenContent, "msgstr["))
00685                 {
00686                   int num;
00687                   if (sscanf(tokenContent.c_str(), "msgstr[%d]", &num) != 1)
00688                     {
00689                       log_warning << "Error: Couldn't parse: " << tokenContent << std::endl;
00690                     }
00691 
00692                   token = nextToken();
00693                   if(!expectToken("msgstr[x] content", TOKEN_CONTENT)) break;
00694                   msgstr_plural[num] = convert(tokenContent, from_charset, to_charset);
00695                 }
00696               dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural);
00697               // No nextToken()
00698             }
00699           else
00700             {
00701               // "Ordinary" translation
00702               if(!expectContent("'msgstr' keyword", "msgstr")) break;
00703 
00704               token = nextToken();
00705               if(!expectToken("translation in msgstr", TOKEN_CONTENT)) break;
00706 
00707               if (current_msgid == "")
00708                 { // .po Header is hidden in the msgid with the empty string
00709                   parse_header(tokenContent);
00710                 }
00711               else
00712                 {
00713                   dict.add_translation(current_msgid, convert(tokenContent, from_charset, to_charset));
00714                 }
00715               token = nextToken();
00716             }
00717         }
00718     }

Token TinyGetText::POFileReader::nextToken (  )  [inline]

Definition at line 720 of file tinygettext.cpp.

References c, log_warning, nextChar(), skipSpace(), TOKEN_CONTENT, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.

Referenced by tokenize_po().

00721   {
00722     //Clear token contents
00723     tokenContent = "";
00724 
00725     skipSpace();
00726 
00727     if(c == EOF)
00728       return TOKEN_EOF;
00729     else if(c != '"')
00730       {
00731         // Read a keyword
00732         do {
00733           tokenContent += c;
00734           nextChar();
00735         } while(c != EOF && !isspace(static_cast<unsigned char>(c)));
00736         return TOKEN_KEYWORD;
00737       }
00738     else
00739       {
00740         do {
00741           nextChar();
00742           // Read content
00743           while(c != EOF && c != '"') {
00744             if (c == '\\') {
00745               nextChar();
00746               if (c == 'n') c = '\n';
00747               else if (c == 't') c = '\t';
00748               else if (c == 'r') c = '\r';
00749               else if (c == '"') c = '"';
00750               else if (c == '\\') c = '\\';
00751               else
00752                 {
00753                   log_warning << "Unhandled escape character: " << char(c) << std::endl;
00754                   c = ' ';
00755                 }
00756             }
00757             tokenContent += c;
00758             nextChar();
00759           }
00760           if(c == EOF) {
00761             log_warning << "Unclosed string literal: " << tokenContent << std::endl;
00762             return TOKEN_CONTENT;
00763           }
00764 
00765           // Read more strings?
00766           nextChar();
00767           skipSpace();
00768         } while(c == '"');
00769         return TOKEN_CONTENT;
00770       }
00771   }


Member Data Documentation

std::istream& TinyGetText::POFileReader::in [private]

Definition at line 544 of file tinygettext.cpp.

Referenced by nextChar().

Dictionary& TinyGetText::POFileReader::dict [private]

Definition at line 545 of file tinygettext.cpp.

Referenced by parse_header(), and tokenize_po().

std::string TinyGetText::POFileReader::from_charset [private]

Definition at line 547 of file tinygettext.cpp.

Referenced by parse_header(), and tokenize_po().

std::string TinyGetText::POFileReader::to_charset [private]

Definition at line 548 of file tinygettext.cpp.

Referenced by parse_header(), and tokenize_po().

int TinyGetText::POFileReader::line_num [private]

Definition at line 550 of file tinygettext.cpp.

Referenced by expectContent(), expectToken(), nextChar(), and POFileReader().

int TinyGetText::POFileReader::c [private]

Definition at line 551 of file tinygettext.cpp.

Referenced by nextChar(), nextToken(), POFileReader(), and skipSpace().

Token TinyGetText::POFileReader::token [private]

Definition at line 557 of file tinygettext.cpp.

Referenced by expectContent(), expectToken(), and tokenize_po().

std::string TinyGetText::POFileReader::tokenContent [private]

Definition at line 558 of file tinygettext.cpp.

Referenced by expectContent(), expectToken(), nextToken(), and tokenize_po().


The documentation for this class was generated from the following file:
Generated on Mon Apr 21 03:38:35 2014 for SuperTux by  doxygen 1.5.1