Fixed to not use the assumption that the first line of a commit message set the encoding.

This commit is contained in:
Loic d'Anterroches 2010-03-26 19:57:18 +01:00
parent a5f97c59d9
commit 3c29e4e6ae
2 changed files with 29 additions and 18 deletions

View File

@ -151,7 +151,8 @@ class IDF_Commit extends Pluf_Model
$commit = new IDF_Commit();
$commit->project = $project;
$commit->scm_id = $change->commit;
list($commit->summary, $commit->fullmessage) = self::toUTF8(array($change->title, $change->full_message));
$commit->summary = self::toUTF8($change->title);
$commit->fullmessage = self::toUTF8($change->full_message);
$commit->author = $scm->findAuthor($change->author);
$commit->origauthor = $change->author;
$commit->creation_dtime = $change->date;
@ -167,17 +168,18 @@ class IDF_Commit extends Pluf_Model
* first value and then used to convert all the strings.
*
* @param mixed String or array of string to be converted
* @return mixed String or array of string
* @param bool Returns the encoding together with the converted text (false)
* @return mixed String or array of string or array of res + encoding
*/
public static function toUTF8($text)
public static function toUTF8($text, $get_encoding=False)
{
$enc = 'ASCII, UTF-8, ISO-8859-2, ISO-8859-1, JIS, EUC-JP, SJIS';
$enc = 'ASCII, UTF-8, ISO-8859-1, JIS, EUC-JP, SJIS';
$ref = $text;
if (is_array($text)) {
$ref = $text[0];
}
if (Pluf_Text_UTF8::check($ref)) {
return $text;
return (!$get_encoding) ? $text : array($text, 'UTF-8');
}
$encoding = mb_detect_encoding($ref, $enc, true);
if ($encoding == false) {
@ -187,9 +189,10 @@ class IDF_Commit extends Pluf_Model
foreach ($text as $t) {
$res[] = mb_convert_encoding($t, 'UTF-8', $encoding);
}
return $res;
return (!$get_encoding) ? $res : array($res, $encoding);
} else {
return mb_convert_encoding($text, 'UTF-8', $encoding);
$res = mb_convert_encoding($text, 'UTF-8', $encoding);
return (!$get_encoding) ? $res : array($res, $encoding);
}
}

View File

@ -48,15 +48,16 @@ class IDF_Tests_TestGit extends UnitTestCase
$log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-1.txt'));
$log = IDF_Scm_Git::parseLog($log_lines);
$titles = array(
'Quick Profiler entfernt',
'Anwendungsmenu Divider eingefügt',
'Anwendungen aufäumen'
array('Quick Profiler entfernt', 'UTF-8'),
array('Anwendungsmenu Divider eingefügt', 'ISO-8859-1'),
array('Anwendungen aufäumen', 'ISO-8859-1'),
);
foreach ($log as $change) {
$this->assertEqual(array_shift($titles),
IDF_Commit::toUTF8($change->title));
list($title, $senc) = array_shift($titles);
list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true);
$this->assertEqual($title, $conv);
$this->assertEqual($senc, $encoding);
}
}
/**
@ -67,13 +68,20 @@ class IDF_Tests_TestGit extends UnitTestCase
$log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-2.txt'));
$log = IDF_Scm_Git::parseLog($log_lines);
$titles = array(
'Dodałem model',
'Dodałem model',
array('Doda³em model','ISO-8859-1'),
array('Doda³em model','ISO-8859-1'),
// The Good result is 'Dodałem model', the
// problem is that in that case, one cannot
// distinguish between latin1 and latin2. We
// will need to add a way for the project
// admin to set the priority between the
// encodings.
);
foreach ($log as $change) {
$this->assertEqual(array_shift($titles),
IDF_Commit::toUTF8($change->title));
}
list($title, $senc) = array_shift($titles);
list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true);
$this->assertEqual($title, $conv);
$this->assertEqual($senc, $encoding);
}
}
}