SuperTux: src/tinygettext/tinygettext.cpp Source File

00001 //  $Id: tinygettext.cpp 168 2009-11-18 17:55:48Z grumbel $
00002 //
00003 //  TinyGetText
00004 //  Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
00005 //
00006 //  This program is free software; you can redistribute it and/or
00007 //  modify it under the terms of the GNU General Public License
00008 //  as published by the Free Software Foundation; either version 2
00009 //  of the License, or (at your option) any later version.
00010 //
00011 //  This program is distributed in the hope that it will be useful,
00012 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 //  GNU General Public License for more details.
00015 //
00016 //  You should have received a copy of the GNU General Public License
00017 //  along with this program; if not, write to the Free Software
00018 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00019 
00020 #include <config.h>
00021 
00022 #include <sys/types.h>
00023 #include <fstream>
00024 #include <iostream>
00025 #include <algorithm>
00026 #include <ctype.h>
00027 #include <errno.h>
00028 #include <string.h>
00029 
00030 #include <SDL.h>
00031 
00032 #ifndef GP2X
00033 #include <SDL_stdinc.h>
00034 #endif
00035 
00036 #include "tinygettext.hpp"
00037 #include "log.hpp"
00038 #include "physfs/physfs_stream.hpp"
00039 #include "findlocale.hpp"
00040 
00041 //#define TRANSLATION_DEBUG
00042 
00043 namespace TinyGetText {
00044 
00046 std::string convert(const std::string& text,
00047                     const std::string& from_charset,
00048                     const std::string& to_charset)
00049 {
00050 #ifndef GP2X
00051   if (from_charset == to_charset)
00052     return text;
00053 
00054   char *in = new char[text.length() + 1];
00055   strcpy(in, text.c_str());
00056   char *out = SDL_iconv_string(to_charset.c_str(), from_charset.c_str(), in, text.length() + 1);
00057   delete[] in; 
00058   if(out == 0)
00059   {
00060     log_warning << "Error: conversion from " << from_charset << " to " << to_charset << " failed" << std::endl;
00061     return "";
00062   }
00063   std::string ret(out);
00064   SDL_free(out);
00065   return ret;
00066 #else
00067   log_warning << "FIXME: Char conversion not supported on GP2X!" << std::endl;
00068   return "";
00069 #endif
00070 #if 0
00071   iconv_t cd = SDL_iconv_open(to_charset.c_str(), from_charset.c_str());
00072 
00073   size_t in_len = text.length();
00074   size_t out_len = text.length()*3; // FIXME: cross fingers that this is enough
00075 
00076   char*  out_orig = new char[out_len];
00077   char*  in_orig  = new char[in_len+1];
00078   strcpy(in_orig, text.c_str());
00079 
00080   char* out = out_orig;
00081   ICONV_CONST char* in  = in_orig;
00082   size_t out_len_temp = out_len; // iconv is counting down the bytes it has
00083                                  // written from this...
00084 
00085   size_t retval = SDL_iconv(cd, &in, &in_len, &out, &out_len_temp);
00086   out_len -= out_len_temp; // see above
00087   if (retval == (size_t) -1)
00088     {
00089       log_warning << strerror(errno) << std::endl;
00090       log_warning << "Error: conversion from " << from_charset << " to " << to_charset << " went wrong: " << retval << std::endl;
00091       return "";
00092     }
00093   SDL_iconv_close(cd);
00094 
00095   std::string ret(out_orig, out_len);
00096   delete[] out_orig;
00097   delete[] in_orig;
00098   return ret;
00099 #endif
00100 }
00101 
00102 bool has_suffix(const std::string& lhs, const std::string rhs)
00103 {
00104   if (lhs.length() < rhs.length())
00105     return false;
00106   else
00107     return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0;
00108 }
00109 
00110 bool has_prefix(const std::string& lhs, const std::string rhs)
00111 {
00112   if (lhs.length() < rhs.length())
00113     return false;
00114   else
00115     return lhs.compare(0, rhs.length(), rhs) == 0;
00116 }
00117 
00118 int plural1(int )     { return 0; }
00119 int plural2_1(int n)  { return (n != 1); }
00120 int plural2_2(int n)  { return (n > 1); }
00121 int plural3_lv(int n) { return (n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); }
00122 int plural3_ga(int n) { return n==1 ? 0 : n==2 ? 1 : 2; }
00123 int plural3_lt(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); }
00124 int plural3_1(int n)  { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
00125 int plural3_sk(int n) { return (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2; }
00126 int plural3_pl(int n) { return (n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
00127 int plural3_sl(int n) { return (n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); }
00128 
00130 //*{
00131 LanguageDef lang_hu("hu", "Hungarian",         1, plural1); // "nplurals=1; plural=0;"
00132 LanguageDef lang_ja("ja", "Japanese",          1, plural1); // "nplurals=1; plural=0;"
00133 LanguageDef lang_ko("ko", "Korean",            1, plural1); // "nplurals=1; plural=0;"
00134 LanguageDef lang_tr("tr", "Turkish",           1, plural1); // "nplurals=1; plural=0;"
00135 LanguageDef lang_da("da", "Danish",            2, plural2_1); // "nplurals=2; plural=(n != 1);"
00136 LanguageDef lang_nl("nl", "Dutch",             2, plural2_1); // "nplurals=2; plural=(n != 1);"
00137 LanguageDef lang_en("en", "English",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
00138 LanguageDef lang_fo("fo", "Faroese",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
00139 LanguageDef lang_de("de", "German",            2, plural2_1); // "nplurals=2; plural=(n != 1);"
00140 LanguageDef lang_nb("nb", "Norwegian Bokmal",  2, plural2_1); // "nplurals=2; plural=(n != 1);"
00141 LanguageDef lang_no("no", "Norwegian",         2, plural2_1); // "nplurals=2; plural=(n != 1);"
00142 LanguageDef lang_nn("nn", "Norwegian Nynorsk", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
00143 LanguageDef lang_sv("sv", "Swedish",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
00144 LanguageDef lang_et("et", "Estonian",          2, plural2_1); // "nplurals=2; plural=(n != 1);"
00145 LanguageDef lang_fi("fi", "Finnish",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
00146 LanguageDef lang_el("el", "Greek",             2, plural2_1); // "nplurals=2; plural=(n != 1);"
00147 LanguageDef lang_he("he", "Hebrew",            2, plural2_1); // "nplurals=2; plural=(n != 1);"
00148 LanguageDef lang_it("it", "Italian",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
00149 LanguageDef lang_pt("pt", "Portuguese",        2, plural2_1); // "nplurals=2; plural=(n != 1);"
00150 LanguageDef lang_es("es", "Spanish",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
00151 LanguageDef lang_eo("eo", "Esperanto",         2, plural2_1); // "nplurals=2; plural=(n != 1);"
00152 LanguageDef lang_fr("fr", "French",            2, plural2_2); // "nplurals=2; plural=(n > 1);"
00153 LanguageDef lang_pt_BR("pt_BR", "Brazilian",   2, plural2_2); // "nplurals=2; plural=(n > 1);"
00154 LanguageDef lang_lv("lv", "Latvian",           3, plural3_lv); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2);"
00155 LanguageDef lang_ga("ga", "Irish",             3, plural3_ga); // "nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;"
00156 LanguageDef lang_lt("lt", "Lithuanian",        3, plural3_lt); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2);"
00157 LanguageDef lang_hr("hr", "Croatian",          3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
00158 LanguageDef lang_cs("cs", "Czech",             3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
00159 LanguageDef lang_ru("ru", "Russian",           3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
00160 LanguageDef lang_uk("uk", "Ukrainian",         3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
00161 LanguageDef lang_sk("sk", "Slovak",            3, plural3_sk); // "nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;"
00162 LanguageDef lang_pl("pl", "Polish",            3, plural3_pl); // "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);
00163 LanguageDef lang_sl("sl", "Slovenian",         3, plural3_sl); // "nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);"
00164 //*}
00165 
00166 LanguageDef&
00167 get_language_def(const std::string& name)
00168 {
00169   if (name == "hu") return lang_hu;
00170   else if (name == "ja") return lang_ja;
00171   else if (name == "ko") return lang_ko;
00172   else if (name == "tr") return lang_tr;
00173   else if (name == "da") return lang_da;
00174   else if (name == "nl") return lang_nl;
00175   else if (name == "en") return lang_en;
00176   else if (name == "fo") return lang_fo;
00177   else if (name == "de") return lang_de;
00178   else if (name == "nb") return lang_nb;
00179   else if (name == "no") return lang_no;
00180   else if (name == "nn") return lang_nn;
00181   else if (name == "sv") return lang_sv;
00182   else if (name == "et") return lang_et;
00183   else if (name == "fi") return lang_fi;
00184   else if (name == "el") return lang_el;
00185   else if (name == "he") return lang_he;
00186   else if (name == "it") return lang_it;
00187   else if (name == "pt") return lang_pt;
00188   else if (name == "es") return lang_es;
00189   else if (name == "eo") return lang_eo;
00190   else if (name == "fr") return lang_fr;
00191   else if (name == "pt_BR") return lang_pt_BR;
00192   else if (name == "lv") return lang_lv;
00193   else if (name == "ga") return lang_ga;
00194   else if (name == "lt") return lang_lt;
00195   else if (name == "hr") return lang_hr;
00196   else if (name == "cs") return lang_cs;
00197   else if (name == "ru") return lang_ru;
00198   else if (name == "uk") return lang_uk;
00199   else if (name == "sk") return lang_sk;
00200   else if (name == "pl") return lang_pl;
00201   else if (name == "sl") return lang_sl;
00202   else return lang_en;
00203 }
00204 
00205 DictionaryManager::DictionaryManager()
00206   : current_dict(&empty_dict)
00207 {
00208   parseLocaleAliases();
00209   // Environment variable SUPERTUX_LANG overrides language settings.
00210   const char* lang = getenv( "SUPERTUX_LANG" );
00211   if( lang ){
00212     set_language( lang );
00213     return;
00214   }
00215   // use findlocale to setup language
00216   FL_Locale *locale;
00217   FL_FindLocale( &locale, FL_MESSAGES );
00218   if(locale->lang) {
00219     if (locale->country) {
00220       set_language( std::string(locale->lang)+"_"+std::string(locale->country) );
00221     } else {
00222       set_language( std::string(locale->lang) );
00223     }
00224   }
00225   FL_FreeLocale( &locale );
00226 }
00227 
00228 void
00229 DictionaryManager::parseLocaleAliases()
00230 {
00231   // try to parse language alias list
00232   std::ifstream in("/usr/share/locale/locale.alias");
00233 
00234   char c = ' ';
00235   while(in.good() && !in.eof()) {
00236     while(isspace(static_cast<unsigned char>(c)) && !in.eof())
00237       in.get(c);
00238 
00239     if(c == '#') { // skip comments
00240       while(c != '\n' && !in.eof())
00241         in.get(c);
00242       continue;
00243     }
00244 
00245     std::string alias;
00246     while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
00247       alias += c;
00248       in.get(c);
00249     }
00250     while(isspace(static_cast<unsigned char>(c)) && !in.eof())
00251       in.get(c);
00252     std::string language;
00253     while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
00254       language += c;
00255       in.get(c);
00256     }
00257 
00258     if(in.eof())
00259       break;
00260     set_language_alias(alias, language);
00261   }
00262 }
00263 
00264 Dictionary&
00265 DictionaryManager::get_dictionary(const std::string& spec)
00266 {
00267 
00268   //log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl;
00269 
00270   std::string lang = get_language_from_spec(spec);
00271 
00272   //log_debug << "...normalized as \"" << lang << "\"" << std::endl;
00273 
00274   Dictionaries::iterator i = dictionaries.find(get_language_from_spec(lang));
00275   if (i != dictionaries.end())
00276     {
00277       return i->second;
00278     }
00279   else // Dictionary for languages lang isn't loaded, so we load it
00280     {
00281       //log_debug << "get_dictionary: " << lang << std::endl;
00282       Dictionary& dict = dictionaries[lang];
00283 
00284       dict.set_language(get_language_def(lang));
00285       if(charset != "")
00286         dict.set_charset(charset);
00287 
00288       for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
00289         {
00290           char** files = PHYSFS_enumerateFiles(p->c_str());
00291           if(!files)
00292             {
00293               log_warning << "Error: enumerateFiles() failed on " << *p << std::endl;
00294             }
00295           else
00296             {
00297               for(const char* const* filename = files;
00298                       *filename != 0; filename++) {
00299 
00300                 // check if filename matches requested language
00301                 std::string fname = std::string(*filename);
00302                 std::string load_from_file = "";
00303                 if(fname == lang + ".po") {
00304                   load_from_file = fname;
00305                 } else {
00306                   std::string::size_type s = lang.find("_");
00307                   if(s != std::string::npos) {
00308                     std::string lang_short = std::string(lang, 0, s);
00309                     if (fname == lang_short + ".po") {
00310                       load_from_file = lang_short;
00311                     }
00312                   }
00313                 }
00314 
00315                 // if it matched, load dictionary
00316                 if (load_from_file != "") {
00317                   //log_debug << "Loading dictionary for language \"" << lang << "\" from \"" << filename << "\"" << std::endl;
00318                   std::string pofile = *p + "/" + *filename;
00319                   try {
00320                       IFileStream in(pofile);
00321                       read_po_file(dict, in);
00322                   } catch(std::exception& e) {
00323                       log_warning << "Error: Failure file opening: " << pofile << std::endl;
00324                       log_warning << e.what() << "" << std::endl;
00325                   }
00326                 }
00327 
00328               }
00329               PHYSFS_freeList(files);
00330             }
00331         }
00332 
00333       return dict;
00334     }
00335 }
00336 
00337 std::set<std::string>
00338 DictionaryManager::get_languages()
00339 {
00340   std::set<std::string> languages;
00341 
00342   for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
00343     {
00344       char** files = PHYSFS_enumerateFiles(p->c_str());
00345       if (!files)
00346         {
00347           log_warning << "Error: opendir() failed on " << *p << std::endl;
00348         }
00349       else
00350         {
00351           for(const char* const* file = files; *file != 0; file++) {
00352               if(has_suffix(*file, ".po")) {
00353                   std::string filename = *file;
00354                   languages.insert(filename.substr(0, filename.length()-3));
00355               }
00356           }
00357           PHYSFS_freeList(files);
00358         }
00359     }
00360   return languages;
00361 }
00362 
00363 void
00364 DictionaryManager::set_language(const std::string& lang)
00365 {
00366   //log_debug << "set_language \"" << lang << "\"" << std::endl;
00367   language = get_language_from_spec(lang);
00368   //log_debug << "==> \"" << language << "\"" << std::endl;
00369   current_dict = & (get_dictionary(language));
00370 }
00371 
00372 const std::string&
00373 DictionaryManager::get_language() const
00374 {
00375   return language;
00376 }
00377 
00378 void
00379 DictionaryManager::set_charset(const std::string& charset)
00380 {
00381   dictionaries.clear(); // changing charset invalidates cache
00382   this->charset = charset;
00383   set_language(language);
00384 }
00385 
00386 void
00387 DictionaryManager::set_language_alias(const std::string& alias,
00388     const std::string& language)
00389 {
00390   language_aliases.insert(std::make_pair(alias, language));
00391 }
00392 
00393 std::string
00394 DictionaryManager::get_language_from_spec(const std::string& spec)
00395 {
00396   std::string lang = spec;
00397   Aliases::iterator i = language_aliases.find(lang);
00398   if(i != language_aliases.end()) {
00399     lang = i->second;
00400   }
00401 
00402   std::string::size_type s = lang.find(".");
00403   if(s != std::string::npos) {
00404     lang = std::string(lang, 0, s);
00405   }
00406 
00407   s = lang.find("_");
00408   if(s == std::string::npos) {
00409     std::string lang_big = lang;
00410     std::transform (lang_big.begin(), lang_big.end(), lang_big.begin(), toupper);
00411     lang += "_" + lang_big;
00412   }
00413 
00414   return lang;
00415 
00416 }
00417 
00418 void
00419 DictionaryManager::add_directory(const std::string& pathname)
00420 {
00421   dictionaries.clear(); // adding directories invalidates cache
00422   search_path.push_back(pathname);
00423   set_language(language);
00424 }
00425 
00426 //---------------------------------------------------------------------------
00427 
00428 Dictionary::Dictionary(const LanguageDef& language_, const std::string& charset_)
00429   : language(language_), charset(charset_)
00430 {
00431 }
00432 
00433 Dictionary::Dictionary()
00434   : language(lang_en)
00435 {
00436 }
00437 
00438 std::string
00439 Dictionary::get_charset() const
00440 {
00441   return charset;
00442 }
00443 
00444 void
00445 Dictionary::set_charset(const std::string& charset_)
00446 {
00447   charset = charset_;
00448 }
00449 
00450 void
00451 Dictionary::set_language(const LanguageDef& lang)
00452 {
00453   language = lang;
00454 }
00455 
00456 std::string
00457 Dictionary::translate(const std::string& msgid, const std::string& msgid2, int num)
00458 {
00459   PluralEntries::iterator i = plural_entries.find(msgid);
00460   std::map<int, std::string>& msgstrs = i->second;
00461 
00462   if (i != plural_entries.end() && !msgstrs.empty())
00463     {
00464       int g = language.plural(num);
00465       std::map<int, std::string>::iterator j = msgstrs.find(g);
00466       if (j != msgstrs.end())
00467         {
00468           return j->second;
00469         }
00470       else
00471         {
00472           // Return the first translation, in case we can't translate the specific number
00473           return msgstrs.begin()->second;
00474         }
00475     }
00476   else
00477     {
00478 #ifdef TRANSLATION_DEBUG
00479       log_warning << "Couldn't translate: " << msgid << std::endl;
00480       log_warning << "Candidates: " << std::endl;
00481       for (PluralEntries::iterator i = plural_entries.begin(); i != plural_entries.end(); ++i)
00482         log_debug << "'" << i->first << "'" << std::endl;
00483 #endif
00484 
00485       if (plural2_1(num)) // default to english rules
00486         return msgid2;
00487       else
00488         return msgid;
00489     }
00490 }
00491 
00492 const char*
00493 Dictionary::translate(const char* msgid)
00494 {
00495   Entries::iterator i = entries.find(msgid);
00496   if (i != entries.end() && !i->second.empty())
00497     {
00498       return i->second.c_str();
00499     }
00500   else
00501     {
00502 #ifdef TRANSLATION_DEBUG
00503       log_warning << "Couldn't translate: " << msgid << std::endl;
00504 #endif
00505       return msgid;
00506     }
00507 }
00508 
00509 std::string
00510 Dictionary::translate(const std::string& msgid)
00511 {
00512   Entries::iterator i = entries.find(msgid);
00513   if (i != entries.end() && !i->second.empty())
00514     {
00515       return i->second;
00516     }
00517   else
00518     {
00519 #ifdef TRANSLATION_DEBUG
00520       log_warning << "Couldn't translate: " << msgid << std::endl;
00521 #endif
00522       return msgid;
00523     }
00524 }
00525 
00526 void
00527 Dictionary::add_translation(const std::string& msgid, const std::string& ,
00528                             const std::map<int, std::string>& msgstrs)
00529 {
00530   // Do we need msgid2 for anything? its after all supplied to the
00531   // translate call, so we just throw it away
00532   plural_entries[msgid] = msgstrs;
00533 }
00534 
00535 void
00536 Dictionary::add_translation(const std::string& msgid, const std::string& msgstr)
00537 {
00538   entries[msgid] = msgstr;
00539 }
00540 
00541 class POFileReader
00542 {
00543 private:
00544   std::istream& in;
00545   Dictionary& dict;
00546 
00547   std::string from_charset;
00548   std::string to_charset;
00549 
00550   int line_num;
00551   int c; //TODO: char c? unsigned char c?
00552   enum Token {
00553       TOKEN_KEYWORD, //msgstr, msgid, etc.
00554       TOKEN_CONTENT, //string literals, concatenated ("" "foo\n" "bar\n" -> "foo\nbar\n")
00555       TOKEN_EOF      //ran out of tokens
00556   };
00557   Token token;
00558   std::string tokenContent; //current contents of the keyword or string literal(s)
00559 
00560 public:
00561   POFileReader(std::istream& in_, Dictionary& dict_)
00562     : in(in_), dict(dict_)
00563   {
00564     line_num = 0;
00565     nextChar();
00566     if(c == 0xef) { // skip UTF-8 intro that some text editors produce
00567         nextChar();
00568         nextChar();
00569         nextChar();
00570     }
00571     tokenize_po();
00572   }
00573 
00574   void parse_header(const std::string& header)
00575   {
00576     // Separate the header in lines
00577     typedef std::vector<std::string> Lines;
00578     Lines lines;
00579 
00580     std::string::size_type start = 0;
00581     for(std::string::size_type i = 0; i < header.length(); ++i)
00582       {
00583         if (header[i] == '\n')
00584           {
00585             lines.push_back(header.substr(start, i - start));
00586             start = i+1;
00587           }
00588       }
00589 
00590     for(Lines::iterator i = lines.begin(); i != lines.end(); ++i)
00591       {
00592         if (has_prefix(*i, "Content-Type: text/plain; charset=")) {
00593           from_charset = i->substr(strlen("Content-Type: text/plain; charset="));
00594         }
00595       }
00596 
00597     if (from_charset.empty() || from_charset == "CHARSET")
00598       {
00599         log_warning << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl;
00600         from_charset = "ISO-8859-1";
00601       }
00602 
00603     to_charset = dict.get_charset();
00604     if (to_charset.empty())
00605       { // No charset requested from the dict, use utf-8
00606         to_charset = "utf-8";
00607         dict.set_charset(from_charset);
00608       }
00609   }
00610 
00611   inline void nextChar()
00612   {
00613     c = in.get();
00614     if (c == '\n')
00615       line_num++;
00616   }
00617 
00618   inline void skipSpace()
00619   {
00620     if(c == EOF)
00621       return;
00622 
00623     while(c == '#' || isspace(static_cast<unsigned char>(c))) {
00624       if(c == '#') {
00625         while(c != '\n' && c != EOF) nextChar();
00626       }
00627       nextChar();
00628     }
00629   }
00630 
00631   inline bool expectToken(std::string type, Token wanted) {
00632      if(token != wanted) {
00633         log_warning << "Expected " << type << ", got ";
00634         if(token == TOKEN_EOF)
00635           log_warning << "EOF";
00636         else if(token == TOKEN_KEYWORD)
00637           log_warning << "keyword '" << tokenContent << "'";
00638         else
00639           log_warning << "string \"" << tokenContent << '"';
00640 
00641         log_warning << " at line " << line_num << std::endl;
00642         return false;
00643      }
00644      return true;
00645   }
00646 
00647   inline bool expectContent(std::string type, std::string wanted) {
00648      if(tokenContent != wanted) {
00649         log_warning << "Expected " << type << ", got ";
00650         if(token == TOKEN_EOF)
00651           log_warning << "EOF";
00652         else if(token == TOKEN_KEYWORD)
00653           log_warning << "keyword '" << tokenContent << "'";
00654         else
00655           log_warning << "string \"" << tokenContent << '"';
00656 
00657         log_warning << " at line " << line_num << std::endl;
00658         return false;
00659      }
00660      return true;
00661   }
00662 
00663   void tokenize_po()
00664     {
00665       token = nextToken();
00666       while(token != TOKEN_EOF)
00667         {
00668           if(!expectToken("'msgid' keyword", TOKEN_KEYWORD) || !expectContent("'msgid' keyword", "msgid")) break;
00669 
00670           token = nextToken();
00671           if(!expectToken("name after msgid", TOKEN_CONTENT)) break;
00672           std::string current_msgid = tokenContent;
00673 
00674           token = nextToken();
00675           if(!expectToken("msgstr or msgid_plural", TOKEN_KEYWORD)) break;
00676           if(tokenContent == "msgid_plural")
00677             {
00678               //Plural form
00679               token = nextToken();
00680               if(!expectToken("msgid_plural content", TOKEN_CONTENT)) break;
00681               std::string current_msgid_plural = tokenContent;
00682 
00683               std::map<int, std::string> msgstr_plural;
00684               while((token = nextToken()) == TOKEN_KEYWORD && has_prefix(tokenContent, "msgstr["))
00685                 {
00686                   int num;
00687                   if (sscanf(tokenContent.c_str(), "msgstr[%d]", &num) != 1)
00688                     {
00689                       log_warning << "Error: Couldn't parse: " << tokenContent << std::endl;
00690                     }
00691 
00692                   token = nextToken();
00693                   if(!expectToken("msgstr[x] content", TOKEN_CONTENT)) break;
00694                   msgstr_plural[num] = convert(tokenContent, from_charset, to_charset);
00695                 }
00696               dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural);
00697               // No nextToken()
00698             }
00699           else
00700             {
00701               // "Ordinary" translation
00702               if(!expectContent("'msgstr' keyword", "msgstr")) break;
00703 
00704               token = nextToken();
00705               if(!expectToken("translation in msgstr", TOKEN_CONTENT)) break;
00706 
00707               if (current_msgid == "")
00708                 { // .po Header is hidden in the msgid with the empty string
00709                   parse_header(tokenContent);
00710                 }
00711               else
00712                 {
00713                   dict.add_translation(current_msgid, convert(tokenContent, from_charset, to_charset));
00714                 }
00715               token = nextToken();
00716             }
00717         }
00718     }
00719 
00720   Token nextToken()
00721   {
00722     //Clear token contents
00723     tokenContent = "";
00724 
00725     skipSpace();
00726 
00727     if(c == EOF)
00728       return TOKEN_EOF;
00729     else if(c != '"')
00730       {
00731         // Read a keyword
00732         do {
00733           tokenContent += c;
00734           nextChar();
00735         } while(c != EOF && !isspace(static_cast<unsigned char>(c)));
00736         return TOKEN_KEYWORD;
00737       }
00738     else
00739       {
00740         do {
00741           nextChar();
00742           // Read content
00743           while(c != EOF && c != '"') {
00744             if (c == '\\') {
00745               nextChar();
00746               if (c == 'n') c = '\n';
00747               else if (c == 't') c = '\t';
00748               else if (c == 'r') c = '\r';
00749               else if (c == '"') c = '"';
00750               else if (c == '\\') c = '\\';
00751               else
00752                 {
00753                   log_warning << "Unhandled escape character: " << char(c) << std::endl;
00754                   c = ' ';
00755                 }
00756             }
00757             tokenContent += c;
00758             nextChar();
00759           }
00760           if(c == EOF) {
00761             log_warning << "Unclosed string literal: " << tokenContent << std::endl;
00762             return TOKEN_CONTENT;
00763           }
00764 
00765           // Read more strings?
00766           nextChar();
00767           skipSpace();
00768         } while(c == '"');
00769         return TOKEN_CONTENT;
00770       }
00771   }
00772 };
00773 
00774 void read_po_file(Dictionary& dict_, std::istream& in)
00775 {
00776   POFileReader reader(in, dict_);
00777 }
00778 
00779 } // namespace TinyGetText
00780 
00781 /* EOF */