diff --git a/src/IDF/Commit.php b/src/IDF/Commit.php index ba4a1e2..6fa1188 100644 --- a/src/IDF/Commit.php +++ b/src/IDF/Commit.php @@ -151,7 +151,8 @@ class IDF_Commit extends Pluf_Model $commit = new IDF_Commit(); $commit->project = $project; $commit->scm_id = $change->commit; - list($commit->summary, $commit->fullmessage) = self::toUTF8(array($change->title, $change->full_message)); + $commit->summary = self::toUTF8($change->title); + $commit->fullmessage = self::toUTF8($change->full_message); $commit->author = $scm->findAuthor($change->author); $commit->origauthor = $change->author; $commit->creation_dtime = $change->date; @@ -167,17 +168,18 @@ class IDF_Commit extends Pluf_Model * first value and then used to convert all the strings. * * @param mixed String or array of string to be converted - * @return mixed String or array of string + * @param bool Returns the encoding together with the converted text (false) + * @return mixed String or array of string or array of res + encoding */ - public static function toUTF8($text) + public static function toUTF8($text, $get_encoding=False) { - $enc = 'ASCII, UTF-8, ISO-8859-2, ISO-8859-1, JIS, EUC-JP, SJIS'; + $enc = 'ASCII, UTF-8, ISO-8859-1, JIS, EUC-JP, SJIS'; $ref = $text; if (is_array($text)) { $ref = $text[0]; } if (Pluf_Text_UTF8::check($ref)) { - return $text; + return (!$get_encoding) ? $text : array($text, 'UTF-8'); } $encoding = mb_detect_encoding($ref, $enc, true); if ($encoding == false) { @@ -187,9 +189,10 @@ class IDF_Commit extends Pluf_Model foreach ($text as $t) { $res[] = mb_convert_encoding($t, 'UTF-8', $encoding); } - return $res; + return (!$get_encoding) ? $res : array($res, $encoding); } else { - return mb_convert_encoding($text, 'UTF-8', $encoding); + $res = mb_convert_encoding($text, 'UTF-8', $encoding); + return (!$get_encoding) ? $res : array($res, $encoding); } } diff --git a/src/IDF/Tests/TestGit.php b/src/IDF/Tests/TestGit.php index c7557c1..ca0affc 100644 --- a/src/IDF/Tests/TestGit.php +++ b/src/IDF/Tests/TestGit.php @@ -48,15 +48,16 @@ class IDF_Tests_TestGit extends UnitTestCase $log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-1.txt')); $log = IDF_Scm_Git::parseLog($log_lines); $titles = array( - 'Quick Profiler entfernt', - 'Anwendungsmenu Divider eingefügt', - 'Anwendungen aufäumen' + array('Quick Profiler entfernt', 'UTF-8'), + array('Anwendungsmenu Divider eingefügt', 'ISO-8859-1'), + array('Anwendungen aufäumen', 'ISO-8859-1'), ); foreach ($log as $change) { - $this->assertEqual(array_shift($titles), - IDF_Commit::toUTF8($change->title)); + list($title, $senc) = array_shift($titles); + list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true); + $this->assertEqual($title, $conv); + $this->assertEqual($senc, $encoding); } - } /** @@ -67,13 +68,20 @@ class IDF_Tests_TestGit extends UnitTestCase $log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-2.txt')); $log = IDF_Scm_Git::parseLog($log_lines); $titles = array( - 'Dodałem model', - 'Dodałem model', + array('Doda³em model','ISO-8859-1'), + array('Doda³em model','ISO-8859-1'), + // The Good result is 'Dodałem model', the + // problem is that in that case, one cannot + // distinguish between latin1 and latin2. We + // will need to add a way for the project + // admin to set the priority between the + // encodings. ); foreach ($log as $change) { - $this->assertEqual(array_shift($titles), - IDF_Commit::toUTF8($change->title)); + list($title, $senc) = array_shift($titles); + list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true); + $this->assertEqual($title, $conv); + $this->assertEqual($senc, $encoding); } - } }