From baa88412b9c7cb4f9d34c4e5430e57006de9000e Mon Sep 17 00:00:00 2001 From: Patrick Georgi Date: Wed, 16 Mar 2011 23:50:41 +0100 Subject: [PATCH] Rewrite the diff parser and reduce the memory footprint. The diff parser code was rewritten for clarity and speed and now handles a couple of ugly cornercases, like SVN's property change output and single change chunks, much better. Since the path parsing was unified as well, the SCM interface gained a new method `getPathStripLevel()` which determines how many path components need to be shoven off a file name for the SCM to form a valid path in the workspace (similar to patch(1)'s --strip option). Fixes issue 627. Automated tests follow. --- NEWS.mdtext | 1 + src/IDF/Diff.php | 198 ++++++++++++++----------------------- src/IDF/Scm.php | 9 ++ src/IDF/Scm/Git.php | 8 ++ src/IDF/Scm/Mercurial.php | 8 ++ src/IDF/Tests/TestDiff.php | 25 +---- src/IDF/Views/Source.php | 3 +- 7 files changed, 105 insertions(+), 147 deletions(-) diff --git a/NEWS.mdtext b/NEWS.mdtext index 09e1fe9..4c75e0c 100644 --- a/NEWS.mdtext +++ b/NEWS.mdtext @@ -52,6 +52,7 @@ - Disable browser autocomplete of password fields in the account settings (issue 616) - Improvements in the automatic linker parser (issue 618) - The `createIssue` API method did not check the API authentication (issue 619) +- Reduce the memory footprint and compatibility of the internal diff parser (issue 627) - Print active git branch heads and tags in bold ## Documentation diff --git a/src/IDF/Diff.php b/src/IDF/Diff.php index a9be715..635afe5 100644 --- a/src/IDF/Diff.php +++ b/src/IDF/Diff.php @@ -27,16 +27,14 @@ */ class IDF_Diff { - public $repo = ''; - public $diff = ''; + public $path_strip_level = 0; protected $lines = array(); public $files = array(); - public function __construct($diff, $repo='') + public function __construct($diff, $path_strip_level = 0) { - $this->repo = $repo; - $this->diff = $diff; + $this->path_strip_level = $path_strip_level; $this->lines = preg_split("/\015\012|\015|\012/", $diff); } @@ -49,118 +47,90 @@ class IDF_Diff $files = array(); $indiff = false; // Used to skip the headers in the git patches $i = 0; // Used to skip the end of a git patch with --\nversion number - foreach ($this->lines as $line) { - $i++; - if (0 === strpos($line, '--') and isset($this->lines[$i]) - and preg_match('/^\d+\.\d+\.\d+\.\d+$/', $this->lines[$i])) { - break; + $diffsize = count($this->lines); + while ($i < $diffsize) { + // look for the potential beginning of a diff + if (substr($this->lines[$i], 0, 4) !== '--- ') { + $i++; + continue; } - if (0 === strpos($line, 'diff --git a')) { - $current_file = self::getFile($line); - $files[$current_file] = array(); - $files[$current_file]['chunks'] = array(); - $files[$current_file]['chunks_def'] = array(); - $current_chunk = 0; - $indiff = true; + + // we're inside a diff candiate + $oldfileline = $this->lines[$i++]; + $newfileline = $this->lines[$i++]; + if (substr($newfileline, 0, 4) !== '+++ ') { + // not a valid diff here, move on continue; - } else if (preg_match('#^diff -r [^\s]+ -r [^\s]+ (.+)$#', $line, $matches)) { - $current_file = $matches[1]; - $files[$current_file] = array(); - $files[$current_file]['chunks'] = array(); - $files[$current_file]['chunks_def'] = array(); - $current_chunk = 0; - $indiff = true; - continue; - } else if (!$indiff && 0 === strpos($line, '=========')) { - // ignore pseudo stanzas with a hint of a binary file - if (preg_match("/^# (.+) is binary/", $this->lines[$i])) - continue; - // by default always use the new name of a possibly renamed file - $current_file = self::getMtnFile($this->lines[$i+1]); - // mtn 0.48 and newer set /dev/null as file path for dropped files - // so we display the old name here - if ($current_file == "/dev/null") { - $current_file = self::getMtnFile($this->lines[$i]); + } + + // use new file name by default + preg_match("/^\+\+\+ ([^\t]+)/", $newfileline, $m); + $current_file = $m[1]; + if ($current_file === '/dev/null') { + // except if it's /dev/null, use the old one instead + // eg. mtn 0.48 and newer + preg_match("/^--- ([^\t]+)/", $oldfileline, $m); + $current_file = $m[1]; + } + if ($this->path_strip_level > 0) { + $current_file = array_pop(explode('/', $current_file, $this->path_strip_level+1)); + } + $current_chunk = 0; + $files[$current_file] = array(); + $files[$current_file]['chunks'] = array(); + $files[$current_file]['chunks_def'] = array(); + + while ($i < $diffsize && substr($this->lines[$i], 0, 3) === '@@ ') { + $elems = preg_match('/@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@.*/', + $this->lines[$i++], $results); + if ($elems != 1) { + // hunk is badly formatted + break; } - if ($current_file == "/dev/null") { - throw new Exception( - "could not determine path from diff" - ); - } - $files[$current_file] = array(); - $files[$current_file]['chunks'] = array(); - $files[$current_file]['chunks_def'] = array(); - $current_chunk = 0; - $indiff = true; - continue; - } else if (0 === strpos($line, 'Index: ')) { - $current_file = self::getSvnFile($line); - $files[$current_file] = array(); - $files[$current_file]['chunks'] = array(); - $files[$current_file]['chunks_def'] = array(); - $current_chunk = 0; - $indiff = true; - continue; - } - if (!$indiff) { - continue; - } - if (0 === strpos($line, '@@ ')) { - $files[$current_file]['chunks_def'][] = self::getChunk($line); + $delstart = $results[1]; + $dellines = $results[2] === '' ? 1 : $results[2]; + $addstart = $results[3]; + $addlines = $results[4] === '' ? 1 : $results[4]; + $chunks_def = array(array($delstart), array($addstart)); + if ($results[2] != '') $chunks_def[0][] = $dellines; + if ($results[4] != '') $chunks_def[1][] = $addlines; + $files[$current_file]['chunks_def'][] = $chunks_def; $files[$current_file]['chunks'][] = array(); + + while ($addlines >= 0 || $dellines >= 0) { + $linetype = $this->lines[$i] != '' ? $this->lines[$i][0] : ' '; + switch ($linetype) { + case ' ': + $files[$current_file]['chunks'][$current_chunk][] = + array($delstart, $addstart, substr($this->lines[$i++], 1)); + $dellines--; + $addlines--; + $delstart++; + $addstart++; + break; + case '+': + $files[$current_file]['chunks'][$current_chunk][] = + array('', $addstart, substr($this->lines[$i++], 1)); + $addlines--; + $addstart++; + break; + case '-': + $files[$current_file]['chunks'][$current_chunk][] = + array($delstart, '', substr($this->lines[$i++], 1)); + $dellines--; + $delstart++; + break; + default: + break 2; + } + } $current_chunk++; - $lline = $files[$current_file]['chunks_def'][$current_chunk-1][0][0]; - $rline = $files[$current_file]['chunks_def'][$current_chunk-1][1][0]; - continue; - } - if (0 === strpos($line, '---') or 0 === strpos($line, '+++')) { - continue; - } - if (0 === strpos($line, '-')) { - $files[$current_file]['chunks'][$current_chunk-1][] = array($lline, '', substr($line, 1)); - $lline++; - continue; - } - if (0 === strpos($line, '+')) { - $files[$current_file]['chunks'][$current_chunk-1][] = array('', $rline, substr($line, 1)); - $rline++; - continue; - } - if (0 === strpos($line, ' ')) { - $files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, substr($line, 1)); - $rline++; - $lline++; - continue; - } - if ($line == '') { - $files[$current_file]['chunks'][$current_chunk-1][] = array($lline, $rline, $line); - $rline++; - $lline++; - continue; } } $this->files = $files; return $files; } - public static function getFile($line) - { - $line = substr(trim($line), 10); - $n = (int) strlen($line)/2; - return trim(substr($line, 3, $n-3)); - } - - public static function getSvnFile($line) - { - return substr(trim($line), 7); - } - - public static function getMtnFile($line) - { - preg_match("/^[+-]{3} ([^\t]+)/", $line, $m); - return $m[1]; - } - /** * Return the html version of a parsed diff. */ @@ -197,7 +167,6 @@ class IDF_Diff return Pluf_Template::markSafe($out); } - public static function padLine($line) { $line = str_replace("\t", ' ', $line); @@ -210,19 +179,6 @@ class IDF_Diff return str_repeat(' ', $i).substr($line, $i); } - /** - * @return array array(array(start, n), array(start, n)) - */ - public static function getChunk($line) - { - $elts = explode(' ', $line); - $res = array(); - for ($i=1;$i<3;$i++) { - $res[] = explode(',', trim(substr($elts[$i], 1))); - } - return $res; - } - /** * Review patch. * @@ -347,7 +303,6 @@ class IDF_Diff return $nnew_chunks; } - public function renderCompared($chunks, $filename) { $fileinfo = IDF_FileUtil::getMimeType($filename); @@ -381,6 +336,5 @@ class IDF_Diff $i++; } return Pluf_Template::markSafe($out); - } } diff --git a/src/IDF/Scm.php b/src/IDF/Scm.php index d9c4435..d299458 100644 --- a/src/IDF/Scm.php +++ b/src/IDF/Scm.php @@ -473,5 +473,14 @@ class IDF_Scm { return str_replace('%2F', '/', rawurlencode($path)); } + + /** + * Returns the number of slashes and preceeding path components + * that should be stripped from paths in the SCM's diff output + */ + public function getDiffPathStripLevel() + { + return 0; + } } diff --git a/src/IDF/Scm/Git.php b/src/IDF/Scm/Git.php index 9339add..a836aea 100644 --- a/src/IDF/Scm/Git.php +++ b/src/IDF/Scm/Git.php @@ -649,6 +649,14 @@ class IDF_Scm_Git extends IDF_Scm return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip'); } + /** + * @see IDF_Scm::getDiffPathStripLevel() + */ + public function getDiffPathStripLevel() + { + return 1; + } + /* * ===================================================== * Specific Git Commands diff --git a/src/IDF/Scm/Mercurial.php b/src/IDF/Scm/Mercurial.php index a413cb1..776e7fc 100644 --- a/src/IDF/Scm/Mercurial.php +++ b/src/IDF/Scm/Mercurial.php @@ -464,4 +464,12 @@ class IDF_Scm_Mercurial extends IDF_Scm escapeshellarg($commit)); return new Pluf_HTTP_Response_CommandPassThru($cmd, 'application/x-zip'); } + + /** + * @see IDF_Scm::getDiffPathStripLevel() + */ + public function getDiffPathStripLevel() + { + return 1; + } } diff --git a/src/IDF/Tests/TestDiff.php b/src/IDF/Tests/TestDiff.php index 00f579f..d28ce22 100644 --- a/src/IDF/Tests/TestDiff.php +++ b/src/IDF/Tests/TestDiff.php @@ -32,29 +32,6 @@ class IDF_Tests_TestDiff extends UnitTestCase parent::__construct('Test the diff parser.'); } - public function testGetFile() - { - $lines = array( - 'diff --git a/src/IDF/Form/Register.php b/src/IDF/Form/Register.php', - 'diff --git a/src/IDF/Form/RegisterConfirmation.php b/src/IDF/Form/RegisterConfirmation.php', - 'diff --git a/src/IDF/Form/RegisterInputKey.php b/src/IDF/Form/RegisterInputKey.php', - 'diff --git a/src/IDF/Views.php b/src/IDF/Views.php', - 'diff --git a/src/IDF/conf/views.php b/src/IDF/conf/views.php', - ); - $files = array( - 'src/IDF/Form/Register.php', - 'src/IDF/Form/RegisterConfirmation.php', - 'src/IDF/Form/RegisterInputKey.php', - 'src/IDF/Views.php', - 'src/IDF/conf/views.php', - ); - $i = 0; - foreach ($lines as $line) { - $this->assertEqual($files[$i], IDF_Diff::getFile($line)); - $i++; - } - } - public function testBinaryDiff() { $diff_content = file_get_contents(dirname(__FILE__).'/test-diff.diff'); @@ -90,4 +67,4 @@ class IDF_Tests_TestDiff extends UnitTestCase $diff->files['src/IDF/Scm/Git.php']['chunks'][1][2]); $this->assertEqual(7, count($diff->files['src/IDF/Scm/Git.php']['chunks'][1])); } -} \ No newline at end of file +} diff --git a/src/IDF/Views/Source.php b/src/IDF/Views/Source.php index f92a2d5..a1c0f6c 100644 --- a/src/IDF/Views/Source.php +++ b/src/IDF/Views/Source.php @@ -303,7 +303,8 @@ class IDF_Views_Source $title = sprintf(__('%s Commit Details'), (string) $request->project); $page_title = sprintf(__('%s Commit Details - %s'), (string) $request->project, $commit); $rcommit = IDF_Commit::getOrAdd($cobject, $request->project); - $diff = new IDF_Diff($cobject->diff); + $diff = new IDF_Diff($cobject->diff, $scm->getDiffPathStripLevel()); + $cobject->diff = null; $diff->parse(); $scmConf = $request->conf->getVal('scm', 'git'); try {