Fixed to not use the assumption that the first line of a commit message set the encoding.

This commit is contained in:
Loic d'Anterroches 2010-03-26 19:57:18 +01:00
parent a5f97c59d9
commit 3c29e4e6ae
2 changed files with 29 additions and 18 deletions

View File

@ -151,7 +151,8 @@ class IDF_Commit extends Pluf_Model
$commit = new IDF_Commit(); $commit = new IDF_Commit();
$commit->project = $project; $commit->project = $project;
$commit->scm_id = $change->commit; $commit->scm_id = $change->commit;
list($commit->summary, $commit->fullmessage) = self::toUTF8(array($change->title, $change->full_message)); $commit->summary = self::toUTF8($change->title);
$commit->fullmessage = self::toUTF8($change->full_message);
$commit->author = $scm->findAuthor($change->author); $commit->author = $scm->findAuthor($change->author);
$commit->origauthor = $change->author; $commit->origauthor = $change->author;
$commit->creation_dtime = $change->date; $commit->creation_dtime = $change->date;
@ -167,17 +168,18 @@ class IDF_Commit extends Pluf_Model
* first value and then used to convert all the strings. * first value and then used to convert all the strings.
* *
* @param mixed String or array of string to be converted * @param mixed String or array of string to be converted
* @return mixed String or array of string * @param bool Returns the encoding together with the converted text (false)
* @return mixed String or array of string or array of res + encoding
*/ */
public static function toUTF8($text) public static function toUTF8($text, $get_encoding=False)
{ {
$enc = 'ASCII, UTF-8, ISO-8859-2, ISO-8859-1, JIS, EUC-JP, SJIS'; $enc = 'ASCII, UTF-8, ISO-8859-1, JIS, EUC-JP, SJIS';
$ref = $text; $ref = $text;
if (is_array($text)) { if (is_array($text)) {
$ref = $text[0]; $ref = $text[0];
} }
if (Pluf_Text_UTF8::check($ref)) { if (Pluf_Text_UTF8::check($ref)) {
return $text; return (!$get_encoding) ? $text : array($text, 'UTF-8');
} }
$encoding = mb_detect_encoding($ref, $enc, true); $encoding = mb_detect_encoding($ref, $enc, true);
if ($encoding == false) { if ($encoding == false) {
@ -187,9 +189,10 @@ class IDF_Commit extends Pluf_Model
foreach ($text as $t) { foreach ($text as $t) {
$res[] = mb_convert_encoding($t, 'UTF-8', $encoding); $res[] = mb_convert_encoding($t, 'UTF-8', $encoding);
} }
return $res; return (!$get_encoding) ? $res : array($res, $encoding);
} else { } else {
return mb_convert_encoding($text, 'UTF-8', $encoding); $res = mb_convert_encoding($text, 'UTF-8', $encoding);
return (!$get_encoding) ? $res : array($res, $encoding);
} }
} }

View File

@ -48,15 +48,16 @@ class IDF_Tests_TestGit extends UnitTestCase
$log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-1.txt')); $log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-1.txt'));
$log = IDF_Scm_Git::parseLog($log_lines); $log = IDF_Scm_Git::parseLog($log_lines);
$titles = array( $titles = array(
'Quick Profiler entfernt', array('Quick Profiler entfernt', 'UTF-8'),
'Anwendungsmenu Divider eingefügt', array('Anwendungsmenu Divider eingefügt', 'ISO-8859-1'),
'Anwendungen aufäumen' array('Anwendungen aufäumen', 'ISO-8859-1'),
); );
foreach ($log as $change) { foreach ($log as $change) {
$this->assertEqual(array_shift($titles), list($title, $senc) = array_shift($titles);
IDF_Commit::toUTF8($change->title)); list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true);
$this->assertEqual($title, $conv);
$this->assertEqual($senc, $encoding);
} }
} }
/** /**
@ -67,13 +68,20 @@ class IDF_Tests_TestGit extends UnitTestCase
$log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-2.txt')); $log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-2.txt'));
$log = IDF_Scm_Git::parseLog($log_lines); $log = IDF_Scm_Git::parseLog($log_lines);
$titles = array( $titles = array(
'Dodałem model', array('Doda³em model','ISO-8859-1'),
'Dodałem model', array('Doda³em model','ISO-8859-1'),
// The Good result is 'Dodałem model', the
// problem is that in that case, one cannot
// distinguish between latin1 and latin2. We
// will need to add a way for the project
// admin to set the priority between the
// encodings.
); );
foreach ($log as $change) { foreach ($log as $change) {
$this->assertEqual(array_shift($titles), list($title, $senc) = array_shift($titles);
IDF_Commit::toUTF8($change->title)); list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true);
} $this->assertEqual($title, $conv);
$this->assertEqual($senc, $encoding);
}
} }
} }