Fixed to not use the assumption that the first line of a commit message set the encoding.
This commit is contained in:
		@@ -151,7 +151,8 @@ class IDF_Commit extends Pluf_Model
 | 
			
		||||
        $commit = new IDF_Commit();
 | 
			
		||||
        $commit->project = $project;
 | 
			
		||||
        $commit->scm_id = $change->commit;
 | 
			
		||||
        list($commit->summary, $commit->fullmessage) = self::toUTF8(array($change->title, $change->full_message));
 | 
			
		||||
        $commit->summary = self::toUTF8($change->title);
 | 
			
		||||
        $commit->fullmessage = self::toUTF8($change->full_message);
 | 
			
		||||
        $commit->author = $scm->findAuthor($change->author);
 | 
			
		||||
        $commit->origauthor = $change->author;
 | 
			
		||||
        $commit->creation_dtime = $change->date;
 | 
			
		||||
@@ -167,17 +168,18 @@ class IDF_Commit extends Pluf_Model
 | 
			
		||||
     * first value and then used to convert all the strings.
 | 
			
		||||
     *
 | 
			
		||||
     * @param mixed String or array of string to be converted
 | 
			
		||||
     * @return mixed String or array of string
 | 
			
		||||
     * @param bool Returns the encoding together with the converted text (false)
 | 
			
		||||
     * @return mixed String or array of string or array of res + encoding
 | 
			
		||||
     */
 | 
			
		||||
    public static function toUTF8($text)
 | 
			
		||||
    public static function toUTF8($text, $get_encoding=False)
 | 
			
		||||
    {
 | 
			
		||||
        $enc = 'ASCII, UTF-8, ISO-8859-2, ISO-8859-1, JIS, EUC-JP, SJIS';
 | 
			
		||||
        $enc = 'ASCII, UTF-8, ISO-8859-1, JIS, EUC-JP, SJIS';
 | 
			
		||||
        $ref = $text;
 | 
			
		||||
        if (is_array($text)) {
 | 
			
		||||
            $ref = $text[0];
 | 
			
		||||
        }
 | 
			
		||||
        if (Pluf_Text_UTF8::check($ref)) {
 | 
			
		||||
            return $text;
 | 
			
		||||
            return (!$get_encoding) ? $text : array($text, 'UTF-8');
 | 
			
		||||
        }
 | 
			
		||||
        $encoding = mb_detect_encoding($ref, $enc, true);
 | 
			
		||||
        if ($encoding == false) {
 | 
			
		||||
@@ -187,9 +189,10 @@ class IDF_Commit extends Pluf_Model
 | 
			
		||||
            foreach ($text as $t) {
 | 
			
		||||
                $res[] = mb_convert_encoding($t, 'UTF-8', $encoding);
 | 
			
		||||
            }
 | 
			
		||||
            return $res;
 | 
			
		||||
            return (!$get_encoding) ? $res : array($res, $encoding);
 | 
			
		||||
        } else {
 | 
			
		||||
            return mb_convert_encoding($text, 'UTF-8', $encoding);
 | 
			
		||||
            $res = mb_convert_encoding($text, 'UTF-8', $encoding);
 | 
			
		||||
            return (!$get_encoding) ? $res : array($res, $encoding);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -48,15 +48,16 @@ class IDF_Tests_TestGit extends UnitTestCase
 | 
			
		||||
        $log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-1.txt'));
 | 
			
		||||
        $log = IDF_Scm_Git::parseLog($log_lines);
 | 
			
		||||
        $titles = array(
 | 
			
		||||
                        'Quick Profiler entfernt',
 | 
			
		||||
                        'Anwendungsmenu Divider eingefügt',
 | 
			
		||||
                        'Anwendungen aufäumen'
 | 
			
		||||
                        array('Quick Profiler entfernt', 'UTF-8'),
 | 
			
		||||
                        array('Anwendungsmenu Divider eingefügt', 'ISO-8859-1'),
 | 
			
		||||
                        array('Anwendungen aufäumen', 'ISO-8859-1'),
 | 
			
		||||
                        );
 | 
			
		||||
        foreach ($log as $change) {
 | 
			
		||||
            $this->assertEqual(array_shift($titles),
 | 
			
		||||
                               IDF_Commit::toUTF8($change->title));
 | 
			
		||||
            list($title, $senc) = array_shift($titles);
 | 
			
		||||
            list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true);
 | 
			
		||||
            $this->assertEqual($title, $conv);
 | 
			
		||||
            $this->assertEqual($senc, $encoding);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
@@ -67,13 +68,20 @@ class IDF_Tests_TestGit extends UnitTestCase
 | 
			
		||||
        $log_lines = preg_split("/\015\012|\015|\012/", file_get_contents(dirname(__FILE__).'/data/git-log-iso-8859-2.txt'));
 | 
			
		||||
        $log = IDF_Scm_Git::parseLog($log_lines);
 | 
			
		||||
        $titles = array(
 | 
			
		||||
                        'Dodałem model',
 | 
			
		||||
                        'Dodałem model',
 | 
			
		||||
                        array('Doda³em model','ISO-8859-1'),
 | 
			
		||||
                        array('Doda³em model','ISO-8859-1'),
 | 
			
		||||
                        // The Good result is 'Dodałem model', the
 | 
			
		||||
                        // problem is that in that case, one cannot
 | 
			
		||||
                        // distinguish between latin1 and latin2. We
 | 
			
		||||
                        // will need to add a way for the project
 | 
			
		||||
                        // admin to set the priority between the
 | 
			
		||||
                        // encodings.
 | 
			
		||||
                        );
 | 
			
		||||
        foreach ($log as $change) {
 | 
			
		||||
            $this->assertEqual(array_shift($titles),
 | 
			
		||||
                               IDF_Commit::toUTF8($change->title));
 | 
			
		||||
            list($title, $senc) = array_shift($titles);
 | 
			
		||||
            list($conv, $encoding) = IDF_Commit::toUTF8($change->title, true);
 | 
			
		||||
            $this->assertEqual($title, $conv);
 | 
			
		||||
            $this->assertEqual($senc, $encoding);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user