Rewrite the diff parser and reduce the memory footprint.

The diff parser code was rewritten for clarity and speed and now handles
a couple of ugly cornercases, like SVN's property change output and single
change chunks, much better. Since the path parsing was unified as well,
the SCM interface gained a new method `getPathStripLevel()` which determines
how many path components need to be shoven off a file name for the SCM
to form a valid path in the workspace (similar to patch(1)'s --strip option).
Fixes issue 627. Automated tests follow.
This commit is contained in:
Patrick Georgi 2011-03-16 23:50:41 +01:00 committed by Thomas Keller
parent 6fb9b72e22
commit baa88412b9
7 changed files with 105 additions and 147 deletions

View File

@ -52,6 +52,7 @@
- Disable browser autocomplete of password fields in the account settings (issue 616)
- Improvements in the automatic linker parser (issue 618)
- The `createIssue` API method did not check the API authentication (issue 619)
- Reduce the memory footprint and compatibility of the internal diff parser (issue 627)
- Print active git branch heads and tags in bold
## Documentation

View File

@ -27,16 +27,14 @@
*/
class IDF_Diff
{
public $repo = '';
public $diff = '';
public $path_strip_level = 0;
protected $lines = array();
public $files = array();
public function __construct($diff, $repo='')
public function __construct($diff, $path_strip_level = 0)
{
$this->repo = $repo;
$this->diff = $diff;
$this->path_strip_level = $path_strip_level;
$this->lines = preg_split("/\015\012|\015|\012/", $diff);
}
@ -49,118 +47,90 @@ class IDF_Diff
$files = array();
$indiff = false; // Used to skip the headers in the git patches
$i = 0; // Used to skip the end of a git patch with --\nversion number
foreach ($this->lines as $line) {
$diffsize = count($this->lines);
while ($i < $diffsize) {
// look for the potential beginning of a diff
if (substr($this->lines[$i], 0, 4) !== '--- ') {
$i++;
if (0 === strpos($line, '--') and isset($this->lines[$i])
and preg_match('/^\d+\.\d+\.\d+\.\d+$/', $this->lines[$i])) {
continue;
}
// we're inside a diff candiate
$oldfileline = $this->lines[$i++];
$newfileline = $this->lines[$i++];
if (substr($newfileline, 0, 4) !== '+++ ') {
// not a valid diff here, move on
continue;
}
// use new file name by default
preg_match("/^\+\+\+ ([^\t]+)/", $newfileline, $m);
$current_file = $m[1];
if ($current_file === '/dev/null') {
// except if it's /dev/null, use the old one instead
// eg. mtn 0.48 and newer
preg_match("/^--- ([^\t]+)/", $oldfileline, $m);
$current_file = $m[1];
}
if ($this->path_strip_level > 0) {
$current_file = array_pop(explode('/', $current_file, $this->path_strip_level+1));
}
$current_chunk = 0;
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
while ($i < $diffsize && substr($this->lines[$i], 0, 3) === '@@ ') {
$elems = preg_match('/@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@.*/',
$this->lines[$i++], $results);
if ($elems != 1) {
// hunk is badly formatted
break;
}
if (0 === strpos($line, 'diff --git a')) {
$current_file = self::getFile($line);
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
} else if (preg_match('#^diff -r [^\s]+ -r [^\s]+ (.+)$#', $line, $matches)) {
$current_file = $matches[1];
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
} else if (!$indiff && 0 === strpos($line, '=========')) {
// ignore pseudo stanzas with a hint of a binary file
if (preg_match("/^# (.+) is binary/", $this->lines[$i]))
continue;
// by default always use the new name of a possibly renamed file
$current_file = self::getMtnFile($this->lines[$i+1]);
// mtn 0.48 and newer set /dev/null as file path for dropped files
// so we display the old name here
if ($current_file == "/dev/null") {
$current_file = self::getMtnFile($this->lines[$i]);
}
if ($current_file == "/dev/null") {
throw new Exception(
"could not determine path from diff"
);
}
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
} else if (0 === strpos($line, 'Index: ')) {
$current_file = self::getSvnFile($line);
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
}
if (!$indiff) {
continue;
}
if (0 === strpos($line, '@@ ')) {
$files[$current_file]['chunks_def'][] = self::getChunk($line);
$delstart = $results[1];
$dellines = $results[2] === '' ? 1 : $results[2];
$addstart = $results[3];
$addlines = $results[4] === '' ? 1 : $results[4];
$chunks_def = array(array($delstart), array($addstart));
if ($results[2] != '') $chunks_def[0][] = $dellines;
if ($results[4] != '') $chunks_def[1][] = $addlines;
$files[$current_file]['chunks_def'][] = $chunks_def;
$files[$current_file]['chunks'][] = array();
while ($addlines >= 0 || $dellines >= 0) {
$linetype = $this->lines[$i] != '' ? $this->lines[$i][0] : ' ';
switch ($linetype) {
case ' ':
$files[$current_file]['chunks'][$current_chunk][] =
array($delstart, $addstart, substr($this->lines[$i++], 1));
$dellines--;
$addlines--;
$delstart++;
$addstart++;
break;
case '+':
$files[$current_file]['chunks'][$current_chunk][] =
array('', $addstart, substr($this->lines[$i++], 1));
$addlines--;
$addstart++;
break;
case '-':
$files[$current_file]['chunks'][$current_chunk][] =
array($delstart, '', substr($this->lines[$i++], 1));
$dellines--;
$delstart++;
break;
default:
break 2;
}
}
$current_chunk++;
$lline = $files[$current_file]['chunks_def'][$current_chunk-1][0][0];
$rline = $files[$current_file]['chunks_def'][$current_chunk-1][1][0];
continue;
}
if (0 === strpos($line, '---') or 0 === strpos($line, '+++')) {
continue;
}
if (0 === strpos($line, '-')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, '', substr($line, 1));
$lline++;
continue;
}
if (0 === strpos($line, '+')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array('', $rline, substr($line, 1));
$rline++;
continue;
}
if (0 === strpos($line, ' ')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, substr($line, 1));
$rline++;
$lline++;
continue;
}
if ($line == '') {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, $line);
$rline++;
$lline++;
continue;
}
}
$this->files = $files;
return $files;
}
public static function getFile($line)
{
$line = substr(trim($line), 10);
$n = (int) strlen($line)/2;
return trim(substr($line, 3, $n-3));
}
public static function getSvnFile($line)
{
return substr(trim($line), 7);
}
public static function getMtnFile($line)
{
preg_match("/^[+-]{3} ([^\t]+)/", $line, $m);
return $m[1];
}
/**
* Return the html version of a parsed diff.
*/
@ -197,7 +167,6 @@ class IDF_Diff
return Pluf_Template::markSafe($out);
}
public static function padLine($line)
{
$line = str_replace("\t", ' ', $line);
@ -210,19 +179,6 @@ class IDF_Diff
return str_repeat('&nbsp;', $i).substr($line, $i);
}
/**
* @return array array(array(start, n), array(start, n))
*/
public static function getChunk($line)
{
$elts = explode(' ', $line);
$res = array();
for ($i=1;$i<3;$i++) {
$res[] = explode(',', trim(substr($elts[$i], 1)));
}
return $res;
}
/**
* Review patch.
*
@ -347,7 +303,6 @@ class IDF_Diff
return $nnew_chunks;
}
public function renderCompared($chunks, $filename)
{
$fileinfo = IDF_FileUtil::getMimeType($filename);
@ -381,6 +336,5 @@ class IDF_Diff
$i++;
}
return Pluf_Template::markSafe($out);
}
}

View File

@ -473,5 +473,14 @@ class IDF_Scm
{
return str_replace('%2F', '/', rawurlencode($path));
}
/**
* Returns the number of slashes and preceeding path components
* that should be stripped from paths in the SCM's diff output
*/
public function getDiffPathStripLevel()
{
return 0;
}
}

View File

@ -649,6 +649,14 @@ class IDF_Scm_Git extends IDF_Scm
return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip');
}
/**
* @see IDF_Scm::getDiffPathStripLevel()
*/
public function getDiffPathStripLevel()
{
return 1;
}
/*
* =====================================================
* Specific Git Commands

View File

@ -464,4 +464,12 @@ class IDF_Scm_Mercurial extends IDF_Scm
escapeshellarg($commit));
return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip');
}
/**
* @see IDF_Scm::getDiffPathStripLevel()
*/
public function getDiffPathStripLevel()
{
return 1;
}
}

View File

@ -32,29 +32,6 @@ class IDF_Tests_TestDiff extends UnitTestCase
parent::__construct('Test the diff parser.');
}
public function testGetFile()
{
$lines = array(
'diff --git a/src/IDF/Form/Register.php b/src/IDF/Form/Register.php',
'diff --git a/src/IDF/Form/RegisterConfirmation.php b/src/IDF/Form/RegisterConfirmation.php',
'diff --git a/src/IDF/Form/RegisterInputKey.php b/src/IDF/Form/RegisterInputKey.php',
'diff --git a/src/IDF/Views.php b/src/IDF/Views.php',
'diff --git a/src/IDF/conf/views.php b/src/IDF/conf/views.php',
);
$files = array(
'src/IDF/Form/Register.php',
'src/IDF/Form/RegisterConfirmation.php',
'src/IDF/Form/RegisterInputKey.php',
'src/IDF/Views.php',
'src/IDF/conf/views.php',
);
$i = 0;
foreach ($lines as $line) {
$this->assertEqual($files[$i], IDF_Diff::getFile($line));
$i++;
}
}
public function testBinaryDiff()
{
$diff_content = file_get_contents(dirname(__FILE__).'/test-diff.diff');

View File

@ -303,7 +303,8 @@ class IDF_Views_Source
$title = sprintf(__('%s Commit Details'), (string) $request->project);
$page_title = sprintf(__('%s Commit Details - %s'), (string) $request->project, $commit);
$rcommit = IDF_Commit::getOrAdd($cobject, $request->project);
$diff = new IDF_Diff($cobject->diff);
$diff = new IDF_Diff($cobject->diff, $scm->getDiffPathStripLevel());
$cobject->diff = null;
$diff->parse();
$scmConf = $request->conf->getVal('scm', 'git');
try {