Rewrite the diff parser and reduce the memory footprint.

The diff parser code was rewritten for clarity and speed and now handles
a couple of ugly cornercases, like SVN's property change output and single
change chunks, much better. Since the path parsing was unified as well,
the SCM interface gained a new method `getPathStripLevel()` which determines
how many path components need to be shoven off a file name for the SCM
to form a valid path in the workspace (similar to patch(1)'s --strip option).
Fixes issue 627. Automated tests follow.
This commit is contained in:
Patrick Georgi 2011-03-16 23:50:41 +01:00 committed by Thomas Keller
parent 6fb9b72e22
commit baa88412b9
7 changed files with 105 additions and 147 deletions

View File

@ -52,6 +52,7 @@
- Disable browser autocomplete of password fields in the account settings (issue 616) - Disable browser autocomplete of password fields in the account settings (issue 616)
- Improvements in the automatic linker parser (issue 618) - Improvements in the automatic linker parser (issue 618)
- The `createIssue` API method did not check the API authentication (issue 619) - The `createIssue` API method did not check the API authentication (issue 619)
- Reduce the memory footprint and compatibility of the internal diff parser (issue 627)
- Print active git branch heads and tags in bold - Print active git branch heads and tags in bold
## Documentation ## Documentation

View File

@ -27,16 +27,14 @@
*/ */
class IDF_Diff class IDF_Diff
{ {
public $repo = ''; public $path_strip_level = 0;
public $diff = '';
protected $lines = array(); protected $lines = array();
public $files = array(); public $files = array();
public function __construct($diff, $repo='') public function __construct($diff, $path_strip_level = 0)
{ {
$this->repo = $repo; $this->path_strip_level = $path_strip_level;
$this->diff = $diff;
$this->lines = preg_split("/\015\012|\015|\012/", $diff); $this->lines = preg_split("/\015\012|\015|\012/", $diff);
} }
@ -49,118 +47,90 @@ class IDF_Diff
$files = array(); $files = array();
$indiff = false; // Used to skip the headers in the git patches $indiff = false; // Used to skip the headers in the git patches
$i = 0; // Used to skip the end of a git patch with --\nversion number $i = 0; // Used to skip the end of a git patch with --\nversion number
foreach ($this->lines as $line) { $diffsize = count($this->lines);
$i++; while ($i < $diffsize) {
if (0 === strpos($line, '--') and isset($this->lines[$i]) // look for the potential beginning of a diff
and preg_match('/^\d+\.\d+\.\d+\.\d+$/', $this->lines[$i])) { if (substr($this->lines[$i], 0, 4) !== '--- ') {
break; $i++;
continue;
} }
if (0 === strpos($line, 'diff --git a')) {
$current_file = self::getFile($line); // we're inside a diff candiate
$files[$current_file] = array(); $oldfileline = $this->lines[$i++];
$files[$current_file]['chunks'] = array(); $newfileline = $this->lines[$i++];
$files[$current_file]['chunks_def'] = array(); if (substr($newfileline, 0, 4) !== '+++ ') {
$current_chunk = 0; // not a valid diff here, move on
$indiff = true;
continue; continue;
} else if (preg_match('#^diff -r [^\s]+ -r [^\s]+ (.+)$#', $line, $matches)) { }
$current_file = $matches[1];
$files[$current_file] = array(); // use new file name by default
$files[$current_file]['chunks'] = array(); preg_match("/^\+\+\+ ([^\t]+)/", $newfileline, $m);
$files[$current_file]['chunks_def'] = array(); $current_file = $m[1];
$current_chunk = 0; if ($current_file === '/dev/null') {
$indiff = true; // except if it's /dev/null, use the old one instead
continue; // eg. mtn 0.48 and newer
} else if (!$indiff && 0 === strpos($line, '=========')) { preg_match("/^--- ([^\t]+)/", $oldfileline, $m);
// ignore pseudo stanzas with a hint of a binary file $current_file = $m[1];
if (preg_match("/^# (.+) is binary/", $this->lines[$i])) }
continue; if ($this->path_strip_level > 0) {
// by default always use the new name of a possibly renamed file $current_file = array_pop(explode('/', $current_file, $this->path_strip_level+1));
$current_file = self::getMtnFile($this->lines[$i+1]); }
// mtn 0.48 and newer set /dev/null as file path for dropped files $current_chunk = 0;
// so we display the old name here $files[$current_file] = array();
if ($current_file == "/dev/null") { $files[$current_file]['chunks'] = array();
$current_file = self::getMtnFile($this->lines[$i]); $files[$current_file]['chunks_def'] = array();
while ($i < $diffsize && substr($this->lines[$i], 0, 3) === '@@ ') {
$elems = preg_match('/@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@.*/',
$this->lines[$i++], $results);
if ($elems != 1) {
// hunk is badly formatted
break;
} }
if ($current_file == "/dev/null") { $delstart = $results[1];
throw new Exception( $dellines = $results[2] === '' ? 1 : $results[2];
"could not determine path from diff" $addstart = $results[3];
); $addlines = $results[4] === '' ? 1 : $results[4];
} $chunks_def = array(array($delstart), array($addstart));
$files[$current_file] = array(); if ($results[2] != '') $chunks_def[0][] = $dellines;
$files[$current_file]['chunks'] = array(); if ($results[4] != '') $chunks_def[1][] = $addlines;
$files[$current_file]['chunks_def'] = array(); $files[$current_file]['chunks_def'][] = $chunks_def;
$current_chunk = 0;
$indiff = true;
continue;
} else if (0 === strpos($line, 'Index: ')) {
$current_file = self::getSvnFile($line);
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
}
if (!$indiff) {
continue;
}
if (0 === strpos($line, '@@ ')) {
$files[$current_file]['chunks_def'][] = self::getChunk($line);
$files[$current_file]['chunks'][] = array(); $files[$current_file]['chunks'][] = array();
while ($addlines >= 0 || $dellines >= 0) {
$linetype = $this->lines[$i] != '' ? $this->lines[$i][0] : ' ';
switch ($linetype) {
case ' ':
$files[$current_file]['chunks'][$current_chunk][] =
array($delstart, $addstart, substr($this->lines[$i++], 1));
$dellines--;
$addlines--;
$delstart++;
$addstart++;
break;
case '+':
$files[$current_file]['chunks'][$current_chunk][] =
array('', $addstart, substr($this->lines[$i++], 1));
$addlines--;
$addstart++;
break;
case '-':
$files[$current_file]['chunks'][$current_chunk][] =
array($delstart, '', substr($this->lines[$i++], 1));
$dellines--;
$delstart++;
break;
default:
break 2;
}
}
$current_chunk++; $current_chunk++;
$lline = $files[$current_file]['chunks_def'][$current_chunk-1][0][0];
$rline = $files[$current_file]['chunks_def'][$current_chunk-1][1][0];
continue;
}
if (0 === strpos($line, '---') or 0 === strpos($line, '+++')) {
continue;
}
if (0 === strpos($line, '-')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, '', substr($line, 1));
$lline++;
continue;
}
if (0 === strpos($line, '+')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array('', $rline, substr($line, 1));
$rline++;
continue;
}
if (0 === strpos($line, ' ')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, substr($line, 1));
$rline++;
$lline++;
continue;
}
if ($line == '') {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, $line);
$rline++;
$lline++;
continue;
} }
} }
$this->files = $files; $this->files = $files;
return $files; return $files;
} }
public static function getFile($line)
{
$line = substr(trim($line), 10);
$n = (int) strlen($line)/2;
return trim(substr($line, 3, $n-3));
}
public static function getSvnFile($line)
{
return substr(trim($line), 7);
}
public static function getMtnFile($line)
{
preg_match("/^[+-]{3} ([^\t]+)/", $line, $m);
return $m[1];
}
/** /**
* Return the html version of a parsed diff. * Return the html version of a parsed diff.
*/ */
@ -197,7 +167,6 @@ class IDF_Diff
return Pluf_Template::markSafe($out); return Pluf_Template::markSafe($out);
} }
public static function padLine($line) public static function padLine($line)
{ {
$line = str_replace("\t", ' ', $line); $line = str_replace("\t", ' ', $line);
@ -210,19 +179,6 @@ class IDF_Diff
return str_repeat('&nbsp;', $i).substr($line, $i); return str_repeat('&nbsp;', $i).substr($line, $i);
} }
/**
* @return array array(array(start, n), array(start, n))
*/
public static function getChunk($line)
{
$elts = explode(' ', $line);
$res = array();
for ($i=1;$i<3;$i++) {
$res[] = explode(',', trim(substr($elts[$i], 1)));
}
return $res;
}
/** /**
* Review patch. * Review patch.
* *
@ -347,7 +303,6 @@ class IDF_Diff
return $nnew_chunks; return $nnew_chunks;
} }
public function renderCompared($chunks, $filename) public function renderCompared($chunks, $filename)
{ {
$fileinfo = IDF_FileUtil::getMimeType($filename); $fileinfo = IDF_FileUtil::getMimeType($filename);
@ -381,6 +336,5 @@ class IDF_Diff
$i++; $i++;
} }
return Pluf_Template::markSafe($out); return Pluf_Template::markSafe($out);
} }
} }

View File

@ -473,5 +473,14 @@ class IDF_Scm
{ {
return str_replace('%2F', '/', rawurlencode($path)); return str_replace('%2F', '/', rawurlencode($path));
} }
/**
* Returns the number of slashes and preceeding path components
* that should be stripped from paths in the SCM's diff output
*/
public function getDiffPathStripLevel()
{
return 0;
}
} }

View File

@ -649,6 +649,14 @@ class IDF_Scm_Git extends IDF_Scm
return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip'); return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip');
} }
/**
* @see IDF_Scm::getDiffPathStripLevel()
*/
public function getDiffPathStripLevel()
{
return 1;
}
/* /*
* ===================================================== * =====================================================
* Specific Git Commands * Specific Git Commands

View File

@ -464,4 +464,12 @@ class IDF_Scm_Mercurial extends IDF_Scm
escapeshellarg($commit)); escapeshellarg($commit));
return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip'); return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip');
} }
/**
* @see IDF_Scm::getDiffPathStripLevel()
*/
public function getDiffPathStripLevel()
{
return 1;
}
} }

View File

@ -32,29 +32,6 @@ class IDF_Tests_TestDiff extends UnitTestCase
parent::__construct('Test the diff parser.'); parent::__construct('Test the diff parser.');
} }
public function testGetFile()
{
$lines = array(
'diff --git a/src/IDF/Form/Register.php b/src/IDF/Form/Register.php',
'diff --git a/src/IDF/Form/RegisterConfirmation.php b/src/IDF/Form/RegisterConfirmation.php',
'diff --git a/src/IDF/Form/RegisterInputKey.php b/src/IDF/Form/RegisterInputKey.php',
'diff --git a/src/IDF/Views.php b/src/IDF/Views.php',
'diff --git a/src/IDF/conf/views.php b/src/IDF/conf/views.php',
);
$files = array(
'src/IDF/Form/Register.php',
'src/IDF/Form/RegisterConfirmation.php',
'src/IDF/Form/RegisterInputKey.php',
'src/IDF/Views.php',
'src/IDF/conf/views.php',
);
$i = 0;
foreach ($lines as $line) {
$this->assertEqual($files[$i], IDF_Diff::getFile($line));
$i++;
}
}
public function testBinaryDiff() public function testBinaryDiff()
{ {
$diff_content = file_get_contents(dirname(__FILE__).'/test-diff.diff'); $diff_content = file_get_contents(dirname(__FILE__).'/test-diff.diff');
@ -90,4 +67,4 @@ class IDF_Tests_TestDiff extends UnitTestCase
$diff->files['src/IDF/Scm/Git.php']['chunks'][1][2]); $diff->files['src/IDF/Scm/Git.php']['chunks'][1][2]);
$this->assertEqual(7, count($diff->files['src/IDF/Scm/Git.php']['chunks'][1])); $this->assertEqual(7, count($diff->files['src/IDF/Scm/Git.php']['chunks'][1]));
} }
} }

View File

@ -303,7 +303,8 @@ class IDF_Views_Source
$title = sprintf(__('%s Commit Details'), (string) $request->project); $title = sprintf(__('%s Commit Details'), (string) $request->project);
$page_title = sprintf(__('%s Commit Details - %s'), (string) $request->project, $commit); $page_title = sprintf(__('%s Commit Details - %s'), (string) $request->project, $commit);
$rcommit = IDF_Commit::getOrAdd($cobject, $request->project); $rcommit = IDF_Commit::getOrAdd($cobject, $request->project);
$diff = new IDF_Diff($cobject->diff); $diff = new IDF_Diff($cobject->diff, $scm->getDiffPathStripLevel());
$cobject->diff = null;
$diff->parse(); $diff->parse();
$scmConf = $request->conf->getVal('scm', 'git'); $scmConf = $request->conf->getVal('scm', 'git');
try { try {