Rewrite the diff parser and reduce the memory footprint.

The diff parser code was rewritten for clarity and speed and now handles
a couple of ugly cornercases, like SVN's property change output and single
change chunks, much better. Since the path parsing was unified as well,
the SCM interface gained a new method `getPathStripLevel()` which determines
how many path components need to be shoven off a file name for the SCM
to form a valid path in the workspace (similar to patch(1)'s --strip option).
Fixes issue 627. Automated tests follow.
This commit is contained in:
Patrick Georgi
2011-03-16 23:50:41 +01:00
committed by Thomas Keller
parent 6fb9b72e22
commit baa88412b9
7 changed files with 105 additions and 147 deletions

View File

@@ -27,16 +27,14 @@
*/
class IDF_Diff
{
public $repo = '';
public $diff = '';
public $path_strip_level = 0;
protected $lines = array();
public $files = array();
public function __construct($diff, $repo='')
public function __construct($diff, $path_strip_level = 0)
{
$this->repo = $repo;
$this->diff = $diff;
$this->path_strip_level = $path_strip_level;
$this->lines = preg_split("/\015\012|\015|\012/", $diff);
}
@@ -49,118 +47,90 @@ class IDF_Diff
$files = array();
$indiff = false; // Used to skip the headers in the git patches
$i = 0; // Used to skip the end of a git patch with --\nversion number
foreach ($this->lines as $line) {
$i++;
if (0 === strpos($line, '--') and isset($this->lines[$i])
and preg_match('/^\d+\.\d+\.\d+\.\d+$/', $this->lines[$i])) {
break;
$diffsize = count($this->lines);
while ($i < $diffsize) {
// look for the potential beginning of a diff
if (substr($this->lines[$i], 0, 4) !== '--- ') {
$i++;
continue;
}
if (0 === strpos($line, 'diff --git a')) {
$current_file = self::getFile($line);
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
// we're inside a diff candiate
$oldfileline = $this->lines[$i++];
$newfileline = $this->lines[$i++];
if (substr($newfileline, 0, 4) !== '+++ ') {
// not a valid diff here, move on
continue;
} else if (preg_match('#^diff -r [^\s]+ -r [^\s]+ (.+)$#', $line, $matches)) {
$current_file = $matches[1];
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
} else if (!$indiff && 0 === strpos($line, '=========')) {
// ignore pseudo stanzas with a hint of a binary file
if (preg_match("/^# (.+) is binary/", $this->lines[$i]))
continue;
// by default always use the new name of a possibly renamed file
$current_file = self::getMtnFile($this->lines[$i+1]);
// mtn 0.48 and newer set /dev/null as file path for dropped files
// so we display the old name here
if ($current_file == "/dev/null") {
$current_file = self::getMtnFile($this->lines[$i]);
}
// use new file name by default
preg_match("/^\+\+\+ ([^\t]+)/", $newfileline, $m);
$current_file = $m[1];
if ($current_file === '/dev/null') {
// except if it's /dev/null, use the old one instead
// eg. mtn 0.48 and newer
preg_match("/^--- ([^\t]+)/", $oldfileline, $m);
$current_file = $m[1];
}
if ($this->path_strip_level > 0) {
$current_file = array_pop(explode('/', $current_file, $this->path_strip_level+1));
}
$current_chunk = 0;
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
while ($i < $diffsize && substr($this->lines[$i], 0, 3) === '@@ ') {
$elems = preg_match('/@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@.*/',
$this->lines[$i++], $results);
if ($elems != 1) {
// hunk is badly formatted
break;
}
if ($current_file == "/dev/null") {
throw new Exception(
"could not determine path from diff"
);
}
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
} else if (0 === strpos($line, 'Index: ')) {
$current_file = self::getSvnFile($line);
$files[$current_file] = array();
$files[$current_file]['chunks'] = array();
$files[$current_file]['chunks_def'] = array();
$current_chunk = 0;
$indiff = true;
continue;
}
if (!$indiff) {
continue;
}
if (0 === strpos($line, '@@ ')) {
$files[$current_file]['chunks_def'][] = self::getChunk($line);
$delstart = $results[1];
$dellines = $results[2] === '' ? 1 : $results[2];
$addstart = $results[3];
$addlines = $results[4] === '' ? 1 : $results[4];
$chunks_def = array(array($delstart), array($addstart));
if ($results[2] != '') $chunks_def[0][] = $dellines;
if ($results[4] != '') $chunks_def[1][] = $addlines;
$files[$current_file]['chunks_def'][] = $chunks_def;
$files[$current_file]['chunks'][] = array();
while ($addlines >= 0 || $dellines >= 0) {
$linetype = $this->lines[$i] != '' ? $this->lines[$i][0] : ' ';
switch ($linetype) {
case ' ':
$files[$current_file]['chunks'][$current_chunk][] =
array($delstart, $addstart, substr($this->lines[$i++], 1));
$dellines--;
$addlines--;
$delstart++;
$addstart++;
break;
case '+':
$files[$current_file]['chunks'][$current_chunk][] =
array('', $addstart, substr($this->lines[$i++], 1));
$addlines--;
$addstart++;
break;
case '-':
$files[$current_file]['chunks'][$current_chunk][] =
array($delstart, '', substr($this->lines[$i++], 1));
$dellines--;
$delstart++;
break;
default:
break 2;
}
}
$current_chunk++;
$lline = $files[$current_file]['chunks_def'][$current_chunk-1][0][0];
$rline = $files[$current_file]['chunks_def'][$current_chunk-1][1][0];
continue;
}
if (0 === strpos($line, '---') or 0 === strpos($line, '+++')) {
continue;
}
if (0 === strpos($line, '-')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, '', substr($line, 1));
$lline++;
continue;
}
if (0 === strpos($line, '+')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array('', $rline, substr($line, 1));
$rline++;
continue;
}
if (0 === strpos($line, ' ')) {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, substr($line, 1));
$rline++;
$lline++;
continue;
}
if ($line == '') {
$files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, $line);
$rline++;
$lline++;
continue;
}
}
$this->files = $files;
return $files;
}
public static function getFile($line)
{
$line = substr(trim($line), 10);
$n = (int) strlen($line)/2;
return trim(substr($line, 3, $n-3));
}
public static function getSvnFile($line)
{
return substr(trim($line), 7);
}
public static function getMtnFile($line)
{
preg_match("/^[+-]{3} ([^\t]+)/", $line, $m);
return $m[1];
}
/**
* Return the html version of a parsed diff.
*/
@@ -197,7 +167,6 @@ class IDF_Diff
return Pluf_Template::markSafe($out);
}
public static function padLine($line)
{
$line = str_replace("\t", ' ', $line);
@@ -210,19 +179,6 @@ class IDF_Diff
return str_repeat('&nbsp;', $i).substr($line, $i);
}
/**
* @return array array(array(start, n), array(start, n))
*/
public static function getChunk($line)
{
$elts = explode(' ', $line);
$res = array();
for ($i=1;$i<3;$i++) {
$res[] = explode(',', trim(substr($elts[$i], 1)));
}
return $res;
}
/**
* Review patch.
*
@@ -347,7 +303,6 @@ class IDF_Diff
return $nnew_chunks;
}
public function renderCompared($chunks, $filename)
{
$fileinfo = IDF_FileUtil::getMimeType($filename);
@@ -381,6 +336,5 @@ class IDF_Diff
$i++;
}
return Pluf_Template::markSafe($out);
}
}