Public Member Functions | |
POFileReader (std::istream &in_, Dictionary &dict_) | |
void | parse_header (const std::string &header) |
void | nextChar () |
void | skipSpace () |
bool | expectToken (std::string type, Token wanted) |
bool | expectContent (std::string type, std::string wanted) |
void | tokenize_po () |
Token | nextToken () |
Private Types | |
enum | Token { TOKEN_KEYWORD, TOKEN_CONTENT, TOKEN_EOF } |
Private Attributes | |
std::istream & | in |
Dictionary & | dict |
std::string | from_charset |
std::string | to_charset |
int | line_num |
int | c |
Token | token |
std::string | tokenContent |
Definition at line 541 of file tinygettext.cpp.
enum TinyGetText::POFileReader::Token [private] |
Definition at line 552 of file tinygettext.cpp.
00552 { 00553 TOKEN_KEYWORD, //msgstr, msgid, etc. 00554 TOKEN_CONTENT, //string literals, concatenated ("" "foo\n" "bar\n" -> "foo\nbar\n") 00555 TOKEN_EOF //ran out of tokens 00556 };
TinyGetText::POFileReader::POFileReader | ( | std::istream & | in_, | |
Dictionary & | dict_ | |||
) | [inline] |
Definition at line 561 of file tinygettext.cpp.
References c, line_num, nextChar(), and tokenize_po().
00562 : in(in_), dict(dict_) 00563 { 00564 line_num = 0; 00565 nextChar(); 00566 if(c == 0xef) { // skip UTF-8 intro that some text editors produce 00567 nextChar(); 00568 nextChar(); 00569 nextChar(); 00570 } 00571 tokenize_po(); 00572 }
void TinyGetText::POFileReader::parse_header | ( | const std::string & | header | ) | [inline] |
Definition at line 574 of file tinygettext.cpp.
References dict, from_charset, TinyGetText::Dictionary::get_charset(), TinyGetText::has_prefix(), log_warning, TinyGetText::Dictionary::set_charset(), and to_charset.
Referenced by tokenize_po().
00575 { 00576 // Separate the header in lines 00577 typedef std::vector<std::string> Lines; 00578 Lines lines; 00579 00580 std::string::size_type start = 0; 00581 for(std::string::size_type i = 0; i < header.length(); ++i) 00582 { 00583 if (header[i] == '\n') 00584 { 00585 lines.push_back(header.substr(start, i - start)); 00586 start = i+1; 00587 } 00588 } 00589 00590 for(Lines::iterator i = lines.begin(); i != lines.end(); ++i) 00591 { 00592 if (has_prefix(*i, "Content-Type: text/plain; charset=")) { 00593 from_charset = i->substr(strlen("Content-Type: text/plain; charset=")); 00594 } 00595 } 00596 00597 if (from_charset.empty() || from_charset == "CHARSET") 00598 { 00599 log_warning << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl; 00600 from_charset = "ISO-8859-1"; 00601 } 00602 00603 to_charset = dict.get_charset(); 00604 if (to_charset.empty()) 00605 { // No charset requested from the dict, use utf-8 00606 to_charset = "utf-8"; 00607 dict.set_charset(from_charset); 00608 } 00609 }
void TinyGetText::POFileReader::nextChar | ( | ) | [inline] |
Definition at line 611 of file tinygettext.cpp.
References c, in, and line_num.
Referenced by nextToken(), POFileReader(), and skipSpace().
void TinyGetText::POFileReader::skipSpace | ( | ) | [inline] |
Definition at line 618 of file tinygettext.cpp.
References c, and nextChar().
Referenced by nextToken().
00619 { 00620 if(c == EOF) 00621 return; 00622 00623 while(c == '#' || isspace(static_cast<unsigned char>(c))) { 00624 if(c == '#') { 00625 while(c != '\n' && c != EOF) nextChar(); 00626 } 00627 nextChar(); 00628 } 00629 }
bool TinyGetText::POFileReader::expectToken | ( | std::string | type, | |
Token | wanted | |||
) | [inline] |
Definition at line 631 of file tinygettext.cpp.
References line_num, log_warning, token, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.
Referenced by tokenize_po().
00631 { 00632 if(token != wanted) { 00633 log_warning << "Expected " << type << ", got "; 00634 if(token == TOKEN_EOF) 00635 log_warning << "EOF"; 00636 else if(token == TOKEN_KEYWORD) 00637 log_warning << "keyword '" << tokenContent << "'"; 00638 else 00639 log_warning << "string \"" << tokenContent << '"'; 00640 00641 log_warning << " at line " << line_num << std::endl; 00642 return false; 00643 } 00644 return true; 00645 }
bool TinyGetText::POFileReader::expectContent | ( | std::string | type, | |
std::string | wanted | |||
) | [inline] |
Definition at line 647 of file tinygettext.cpp.
References line_num, log_warning, token, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.
Referenced by tokenize_po().
00647 { 00648 if(tokenContent != wanted) { 00649 log_warning << "Expected " << type << ", got "; 00650 if(token == TOKEN_EOF) 00651 log_warning << "EOF"; 00652 else if(token == TOKEN_KEYWORD) 00653 log_warning << "keyword '" << tokenContent << "'"; 00654 else 00655 log_warning << "string \"" << tokenContent << '"'; 00656 00657 log_warning << " at line " << line_num << std::endl; 00658 return false; 00659 } 00660 return true; 00661 }
void TinyGetText::POFileReader::tokenize_po | ( | ) | [inline] |
Definition at line 663 of file tinygettext.cpp.
References TinyGetText::Dictionary::add_translation(), TinyGetText::convert(), dict, expectContent(), expectToken(), from_charset, TinyGetText::has_prefix(), log_warning, nextToken(), parse_header(), to_charset, token, TOKEN_CONTENT, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.
Referenced by POFileReader().
00664 { 00665 token = nextToken(); 00666 while(token != TOKEN_EOF) 00667 { 00668 if(!expectToken("'msgid' keyword", TOKEN_KEYWORD) || !expectContent("'msgid' keyword", "msgid")) break; 00669 00670 token = nextToken(); 00671 if(!expectToken("name after msgid", TOKEN_CONTENT)) break; 00672 std::string current_msgid = tokenContent; 00673 00674 token = nextToken(); 00675 if(!expectToken("msgstr or msgid_plural", TOKEN_KEYWORD)) break; 00676 if(tokenContent == "msgid_plural") 00677 { 00678 //Plural form 00679 token = nextToken(); 00680 if(!expectToken("msgid_plural content", TOKEN_CONTENT)) break; 00681 std::string current_msgid_plural = tokenContent; 00682 00683 std::map<int, std::string> msgstr_plural; 00684 while((token = nextToken()) == TOKEN_KEYWORD && has_prefix(tokenContent, "msgstr[")) 00685 { 00686 int num; 00687 if (sscanf(tokenContent.c_str(), "msgstr[%d]", &num) != 1) 00688 { 00689 log_warning << "Error: Couldn't parse: " << tokenContent << std::endl; 00690 } 00691 00692 token = nextToken(); 00693 if(!expectToken("msgstr[x] content", TOKEN_CONTENT)) break; 00694 msgstr_plural[num] = convert(tokenContent, from_charset, to_charset); 00695 } 00696 dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural); 00697 // No nextToken() 00698 } 00699 else 00700 { 00701 // "Ordinary" translation 00702 if(!expectContent("'msgstr' keyword", "msgstr")) break; 00703 00704 token = nextToken(); 00705 if(!expectToken("translation in msgstr", TOKEN_CONTENT)) break; 00706 00707 if (current_msgid == "") 00708 { // .po Header is hidden in the msgid with the empty string 00709 parse_header(tokenContent); 00710 } 00711 else 00712 { 00713 dict.add_translation(current_msgid, convert(tokenContent, from_charset, to_charset)); 00714 } 00715 token = nextToken(); 00716 } 00717 } 00718 }
Token TinyGetText::POFileReader::nextToken | ( | ) | [inline] |
Definition at line 720 of file tinygettext.cpp.
References c, log_warning, nextChar(), skipSpace(), TOKEN_CONTENT, TOKEN_EOF, TOKEN_KEYWORD, and tokenContent.
Referenced by tokenize_po().
00721 { 00722 //Clear token contents 00723 tokenContent = ""; 00724 00725 skipSpace(); 00726 00727 if(c == EOF) 00728 return TOKEN_EOF; 00729 else if(c != '"') 00730 { 00731 // Read a keyword 00732 do { 00733 tokenContent += c; 00734 nextChar(); 00735 } while(c != EOF && !isspace(static_cast<unsigned char>(c))); 00736 return TOKEN_KEYWORD; 00737 } 00738 else 00739 { 00740 do { 00741 nextChar(); 00742 // Read content 00743 while(c != EOF && c != '"') { 00744 if (c == '\\') { 00745 nextChar(); 00746 if (c == 'n') c = '\n'; 00747 else if (c == 't') c = '\t'; 00748 else if (c == 'r') c = '\r'; 00749 else if (c == '"') c = '"'; 00750 else if (c == '\\') c = '\\'; 00751 else 00752 { 00753 log_warning << "Unhandled escape character: " << char(c) << std::endl; 00754 c = ' '; 00755 } 00756 } 00757 tokenContent += c; 00758 nextChar(); 00759 } 00760 if(c == EOF) { 00761 log_warning << "Unclosed string literal: " << tokenContent << std::endl; 00762 return TOKEN_CONTENT; 00763 } 00764 00765 // Read more strings? 00766 nextChar(); 00767 skipSpace(); 00768 } while(c == '"'); 00769 return TOKEN_CONTENT; 00770 } 00771 }
std::istream& TinyGetText::POFileReader::in [private] |
Dictionary& TinyGetText::POFileReader::dict [private] |
std::string TinyGetText::POFileReader::from_charset [private] |
std::string TinyGetText::POFileReader::to_charset [private] |
int TinyGetText::POFileReader::line_num [private] |
Definition at line 550 of file tinygettext.cpp.
Referenced by expectContent(), expectToken(), nextChar(), and POFileReader().
int TinyGetText::POFileReader::c [private] |
Definition at line 551 of file tinygettext.cpp.
Referenced by nextChar(), nextToken(), POFileReader(), and skipSpace().
Token TinyGetText::POFileReader::token [private] |
Definition at line 557 of file tinygettext.cpp.
Referenced by expectContent(), expectToken(), and tokenize_po().
std::string TinyGetText::POFileReader::tokenContent [private] |
Definition at line 558 of file tinygettext.cpp.
Referenced by expectContent(), expectToken(), nextToken(), and tokenize_po().