Added a first version of the new SCM backend.

dev
Loic d'Anterroches 2009-04-21 14:13:44 +02:00
parent 903c457439
commit 86da0c0eed
5 changed files with 522 additions and 103 deletions

View File

@ -4,8 +4,6 @@ The SyncSvn plugin allow the direct creation and synchronisation of
subversion repositories with the InDefero database. This requires
giving access to the repositories using the DAV_SVN module of Apache2.
You need at least Subversion 1.4.6.
## To Contact the Author
Baptiste Michaud

View File

@ -22,10 +22,47 @@
# ***** END LICENSE BLOCK ***** */
/**
* Manage differents SCM systems
* Manage differents SCM systems.
*
* This is the base class with the different required methods to be
* implemented by the SCMs. Each SCM backend need to extend this
* class. We are not using an interface because this is not really
* needed.
*
* The philosophy behind the interface is not to provide a wrapper
* around the different SCMs but to provide methods to retrieve in the
* most efficient way the informations to be displayed/needed in the
* web interface. This means that each SCM can use the best options,
* including caching to retrieve the informations.
*
* Note on caching: You must not cache ephemeral information like the
* changelog, but you can cache the commit info (except with
* subversion where you can change commit info...).
*
* All the output of the methods must be serializable. This means that
* if you are parsing XML you need to correctly cast the results as
* string when needed.
*/
class IDF_Scm
{
/**
* String template for consistent error messages.
*/
public $error_tpl = 'Error command "%s" returns code %d and output: %s';
/**
* Path to the repository.
*/
public $repo = '';
/**
* Cache storage.
*
* It must only be used to store data for the lifetime of the
* object. For example if you need to get the list of branches in
* several functions, better to try to get from the cache first.
*/
protected $cache = array();
/**
* Returns an instance of the correct scm backend object.
@ -42,6 +79,134 @@ class IDF_Scm
return call_user_func(array($scms[$scm], 'factory'), $project);
}
/**
* Check if the backend is available for display.
*
* @return bool Available
*/
public function isAvailable()
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Returns the list of branches.
*
* @return array For example array('trunk', '1.0branch')
*/
public function getBranches()
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Returns the list of tags.
*
* @return array For example array('v0.9', 'v1.0')
*/
public function getTags()
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Returns the main branch.
*
* The main branch is the one displayed by default. For example
* master, trunk or tip.
*
* @return string
*/
public function getMainBranch()
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Returns the list of files in a given folder.
*
* The list is an array of standard class objects with attributes
* for each file/directory/external element.
*
* This is the most important method of the SCM backend as this is
* the one conveying the speed feeling of the application. All the
* dirty optimization tricks are allowed there.
*
* @param string Revision or commit
* @param string Folder ('/')
* @param string Branch (null)
* @return array
*/
public function getTree($rev, $folder='/', $branch=null)
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Get commit details.
*
* @param string Commit or revision number
* @param bool Get commit diff (false)
* @return stdClass
*/
public function getCommit($commit, $getdiff=false)
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Get latest changes.
*
* It default to the main branch. If possible you should code in a
* way to avoid repetitive calls to getCommit. Try to be
* efficient.
*
* @param string Branch (null)
* @param int Number of changes (25)
* @return array List of commits
*/
public function getChangeLog($branch=null, $n=10)
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Given the string describing the author from the log find the
* author in the database.
*
* If the input is an array, it will return an array of results.
*
* @param mixed string/array Author
* @return mixed Pluf_User or null or array
*/
public function findAuthor($author)
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Given a revision and a file path, retrieve the file content.
*
* The third parameter is to only request the command that is used
* to get the file content. This is used when downloading a file
* at a given revision as it can be passed to a
* Pluf_HTTP_Response_CommandPassThru reponse. This allows to
* stream a large response without buffering it in memory.
*
* The file definition can be a hash or a path depending on the
* SCM.
*
* @param string File definition
* @param string Revision ('')
* @param bool Returns command only (false)
* @return string File content
*/
public function getFile($def, $rev='', $cmd_only=false)
{
throw new Pluf_Exception_NotImplemented();
}
/**
* Equivalent to exec but with caching.
*

View File

@ -25,16 +25,120 @@
* Git utils.
*
*/
class IDF_Scm_Git
class IDF_Scm_Git extends IDF_Scm
{
public $repo = '';
public $mediumtree_fmt = 'commit %H%nAuthor: %an <%ae>%nTree: %T%nDate: %ai%n%n%s%n%n%b';
public function __construct($repo)
{
$this->repo = $repo;
}
/* ============================================== *
* *
* Common Methods Implemented By All The SCMs *
* *
* ============================================== */
public function isAvailable()
{
try {
$this->getBranches();
} catch (IDF_Scm_Exception $e) {
return false;
}
return true;
}
public function getBranches()
{
if (isset($this->cache['branches'])) {
return $this->cache['branches'];
}
$cmd = Pluf::f('idf_exec_cmd_prefix', '')
.sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' branch',
escapeshellarg($this->repo));
exec($cmd, $out, $return);
if ($return != 0) {
throw new IDF_Scm_Exception(sprintf($this->error_tpl,
$cmd, $return,
implode("\n", $out)));
}
$res = array();
foreach ($out as $b) {
$res[] = substr($b, 2);
}
$this->cache['branches'] = $res;
return $res;
}
public function getMainBranch()
{
return 'master';
}
/**
* Git "tree" is not the same as the tree we get here.
*
* With git each commit object stores a related tree object. This
* tree is basically providing what is in the given folder at the
* given commit. It looks something like that:
*
* <pre>
* 100644 blob bcd155e609c51b4651aab9838b270cce964670af AUTHORS
* 100644 blob 87b44c5c7df3cc90c031317c1ac8efcfd8a13631 COPYING
* 100644 blob 2a0f899cbfe33ea755c343b06a13d7de6c22799f INSTALL.mdtext
* 040000 tree 2f469c4c5318aa4ad48756874373370f6112f77b doc
* 040000 tree 911e0bd2706f0069b04744d6ef41353faf06a0a7 logo
* </pre>
*
* You can then follow what is in the given folder (let say doc)
* by using the hash.
*
* This means that you will have not to confuse the git tree and
* the output tree in the following method.
*
* @see http://www.kernel.org/pub/software/scm/git/docs/git-ls-tree.html
*
*/
public function getTree($commit, $folder='/', $branch=null)
{
$folder = ($folder == '/') ? '' : $folder;
// now we grab the info about this commit including its tree.
$co = $this->getCommit($commit);
if ($folder) {
// As we are limiting to a given folder, we need to find
// the tree corresponding to this folder.
$tinfo = $this->getTreeInfo($commit, $folder);
if (isset($tinfo[0]) and $tinfo[0]->type == 'tree') {
$tree = $tinfo[0]->hash;
} else {
throw new Exception(sprintf(__('Folder %1$s not found in commit %2$s.'), $folder, $commit));
}
} else {
$tree = $co->tree;
}
$res = array();
foreach ($this->getTreeInfo($tree) as $file) {
// Now we grab the files in the current tree with as much
// information as possible.
if ($file->type == 'blob') {
$file->date = $co->date;
$file->log = '----';
$file->author = 'Unknown';
}
$file->fullpath = ($folder) ? $folder.'/'.$file->file : $file->file;
if ($file->type == 'commit') {
// We have a submodule
$file = $this->getSubmodule($file, $commit);
}
$res[] = $file;
}
// Grab the details for each blob and return the list.
return $this->getTreeDetails($res);
}
/**
* Given the string describing the author from the log find the
* author in the database.
@ -120,58 +224,6 @@ class IDF_Scm_Git
*/
public function filesAtCommit($commit='HEAD', $folder='')
{
// now we grab the info about this commit including its tree.
$co = $this->getCommit($commit);
if ($folder) {
// As we are limiting to a given folder, we need to find
// the tree corresponding to this folder.
$found = false;
foreach ($this->getTreeInfo($co->tree, true, $folder) as $file) {
if ($file->type == 'tree' and $file->file == $folder) {
$found = true;
$tree = $file->hash;
break;
}
}
if (!$found) {
throw new Exception(sprintf(__('Folder %1$s not found in commit %2$s.'), $folder, $commit));
}
} else {
$tree = $co->tree;
}
$res = array();
// get the raw log corresponding to this commit to find the
// origin of each file.
$rawlog = array();
$cmd = sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' log --raw --abbrev=40 --pretty=oneline -5000 %s',
escapeshellarg($this->repo), escapeshellarg($commit));
IDF_Scm::exec($cmd, $rawlog);
// We reverse the log to be able to use a fixed efficient
// regex without back tracking.
$rawlog = implode("\n", array_reverse($rawlog));
foreach ($this->getTreeInfo($tree, false) as $file) {
// Now we grab the files in the current tree with as much
// information as possible.
$matches = array();
if ($file->type == 'blob' and preg_match('/^\:\d{6} \d{6} [0-9a-f]{40} '.$file->hash.' .*^([0-9a-f]{40})/msU',
$rawlog, $matches)) {
$fc = $this->getCommit($matches[1]);
$file->date = $fc->date;
$file->log = $fc->title;
$file->author = $fc->author;
} else if ($file->type == 'blob') {
$file->date = $co->date;
$file->log = '----';
$file->author = 'Unknown';
}
$file->fullpath = ($folder) ? $folder.'/'.$file->file : $file->file;
if ($file->type == 'commit') {
// We have a submodule
$file = $this->getSubmodule($file, $commit);
}
$res[] = $file;
}
return $res;
}
/**
@ -179,21 +231,21 @@ class IDF_Scm_Git
*
* @param string Tree hash
* @param bool Do we recurse in subtrees (true)
* @param string Folder in which we want to get the info ('')
* @return array Array of file information.
*/
public function getTreeInfo($tree, $recurse=true, $folder='')
public function getTreeInfo($tree, $folder='')
{
if ('tree' != $this->testHash($tree)) {
if (!in_array($this->testHash($tree), array('tree', 'commit'))) {
throw new Exception(sprintf(__('Not a valid tree: %s.'), $tree));
}
$cmd_tmpl = 'GIT_DIR=%s '.Pluf::f('git_path', 'git').' ls-tree%s -t -l %s %s';
$cmd = sprintf($cmd_tmpl,
escapeshellarg($this->repo),
($recurse) ? ' -r' : '',
escapeshellarg($tree), escapeshellarg($folder));
$cmd_tmpl = 'GIT_DIR=%s '.Pluf::f('git_path', 'git').' ls-tree -l %s %s';
$cmd = Pluf::f('idf_exec_cmd_prefix', '')
.sprintf($cmd_tmpl, escapeshellarg($this->repo),
escapeshellarg($tree), escapeshellarg($folder));
$out = array();
$res = array();
IDF_Scm::exec($cmd, $out);
exec($cmd, $out);
foreach ($out as $line) {
list($perm, $type, $hash, $size, $file) = preg_split('/ |\t/', $line, 5, PREG_SPLIT_NO_EMPTY);
$res[] = (object) array('perm' => $perm, 'type' => $type,
@ -245,31 +297,15 @@ class IDF_Scm_Git
escapeshellarg($request_file_info->hash)));
}
/**
* Get the branches.
*
* @return array Branches.
*/
public function getBranches()
{
$out = array();
IDF_Scm::exec(sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' branch',
escapeshellarg($this->repo)), $out);
$res = array();
foreach ($out as $b) {
$res[] = substr($b, 2);
}
return $res;
}
/**
* Get commit details.
*
* @param string Commit ('HEAD').
* @param bool Get commit diff (false).
* @return array Changes.
* @param string Commit
* @param bool Get commit diff (false)
* @return array Changes
*/
public function getCommit($commit='HEAD', $getdiff=false)
public function getCommit($commit, $getdiff=false)
{
if ($getdiff) {
$cmd = sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' show --date=iso --pretty=format:%s %s',
@ -283,7 +319,7 @@ class IDF_Scm_Git
escapeshellarg($commit));
}
$out = array();
IDF_Scm::exec($cmd, $out);
exec($cmd, $out);
$log = array();
$change = array();
$inchange = false;
@ -440,6 +476,7 @@ class IDF_Scm_Git
public function getSubmodule($file, $commit)
{
$file->type = 'extern';
$file->extern = '';
$info = $this->getFileInfo('.gitmodules', $commit);
if ($info == false) {
return $file;
@ -447,8 +484,204 @@ class IDF_Scm_Git
$gitmodules = $this->getBlob($info);
if (preg_match('#\[submodule\s+\"'.$file->fullpath.'\"\]\s+path\s=\s(\S+)\s+url\s=\s(\S+)#mi', $gitmodules, $matches)) {
$file->extern = $matches[2];
}
}
return $file;
}
/**
* Foreach file in the tree, find the details.
*
* @param array Tree information
* @return array Updated tree information
*/
public function getTreeDetails($tree)
{
$n = count($tree);
$details = array();
for ($i=0;$i<$n;$i++) {
if ($tree[$i]->type == 'blob') {
$details[$tree[$i]->hash] = $i;
}
}
if (!count($details)) {
return $tree;
}
$res = $this->getCachedBlobInfo($details);
$toapp = array();
foreach ($details as $blob => $idx) {
if (isset($res[$blob])) {
$tree[$idx]->date = $res[$blob]->date;
$tree[$idx]->log = $res[$blob]->title;
$tree[$idx]->author = $res[$blob]->author;
} else {
$toapp[$blob] = $idx;
}
}
if (count($toapp)) {
$res = $this->appendBlobInfoCache($toapp);
foreach ($details as $blob => $idx) {
if (isset($res[$blob])) {
$tree[$idx]->date = $res[$blob]->date;
$tree[$idx]->log = $res[$blob]->title;
$tree[$idx]->author = $res[$blob]->author;
}
}
}
return $tree;
}
/**
* Append build info cache.
*
* The append method tries to get only the necessary details, so
* instead of going through all the commits one at a time, it will
* try to find a smarter way with regex.
*
* @see self::buildBlobInfoCache
*
* @param array The blob for which we need the information
* @return array The information
*/
public function appendBlobInfoCache($blobs)
{
$rawlog = array();
$cmd = Pluf::f('idf_exec_cmd_prefix', '')
.sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' log --raw --abbrev=40 --pretty=oneline -5000 --skip=%%s',
escapeshellarg($this->repo));
$skip = 0;
$res = array();
exec(sprintf($cmd, $skip), $rawlog);
while (count($rawlog) and count($blobs)) {
$rawlog = implode("\n", array_reverse($rawlog));
foreach ($blobs as $blob => $idx) {
if (preg_match('/^\:\d{6} \d{6} [0-9a-f]{40} '
.$blob.' .*^([0-9a-f]{40})/msU',
$rawlog, $matches)) {
$fc = $this->getCommit($matches[1]);
$res[$blob] = (object) array('hash' => $blob,
'date' => $fc->date,
'title' => $fc->title,
'author' => $fc->author);
unset($blobs[$blob]);
}
}
$rawlog = array();
$skip += 5000;
if ($skip > 20000) {
// We are in the case of the import of a big old
// repository, we can store as unknown the commit info
// not to try to retrieve them each time.
foreach ($blobs as $blob => $idx) {
$res[$blob] = (object) array('hash' => $blob,
'date' => '0',
'title' => '----',
'author' => 'Unknown');
}
break;
}
exec(sprintf($cmd, $skip), $rawlog);
}
$this->cacheBlobInfo($res);
return $res;
}
/**
* Build the blob info cache.
*
* We build the blob info cache 500 commits at a time.
*/
public function buildBlobInfoCache()
{
$rawlog = array();
$cmd = Pluf::f('idf_exec_cmd_prefix', '')
.sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' log --raw --abbrev=40 --pretty=oneline -500 --skip=%%s',
escapeshellarg($this->repo));
$skip = 0;
exec(sprintf($cmd, $skip), $rawlog);
while (count($rawlog)) {
$commit = '';
$data = array();
foreach ($rawlog as $line) {
if (substr($line, 0, 1) != ':') {
$commit = $this->getCommit(substr($line, 0, 40));
continue;
}
$blob = substr($line, 56, 40);
$data[] = (object) array('hash' => $blob,
'date' => $commit->date,
'title' => $commit->title,
'author' => $commit->author);
}
$this->cacheBlobInfo($data);
$rawlog = array();
$skip += 500;
exec(sprintf($cmd, $skip), $rawlog);
}
}
/**
* Get blob info.
*
* When we display the tree, we want to know when a given file was
* created, who was the author and at which date. This is a very
* slow operation for git as we need to go through the full
* history, find when then blob was introduced, then grab the
* corresponding commit. This is why we need a cache.
*
* @param array List as keys of blob hashs to get info for
* @return array Hash indexed results, when not found not set
*/
public function getCachedBlobInfo($hashes)
{
$res = array();
$cache = Pluf::f('tmp_folder').'/IDF_Scm_Git-'.md5($this->repo).'.cache.db';
if (!file_exists($cache)) {
return $res;
}
$data = file_get_contents($cache);
if (false === $data) {
return $res;
}
$data = split(chr(30), $data);
foreach ($data as $rec) {
if (isset($hashes[substr($rec, 0, 40)])) {
//$tmp = split(chr(31), gzinflate(substr($rec, 40)), 3);
$tmp = split(chr(31), substr($rec, 40), 3);
$res[substr($rec, 0, 40)] =
(object) array('hash' => substr($rec, 0, 40),
'date' => $tmp[0],
'title' => $tmp[2],
'author' => $tmp[1]);
}
}
return $res;
}
/**
* Cache blob info.
*
* Given a series of blob info, cache them.
*
* @param array Blob info
* @return bool Success
*/
public function cacheBlobInfo($info)
{
// Prepare the data
$data = array();
foreach ($info as $file) {
//$data[] = $file->hash.gzdeflate($file->date.chr(31).$file->author.chr(31).$file->title, 9);
$data[] = $file->hash.$file->date.chr(31).$file->author.chr(31).$file->title;
}
$data = implode(chr(30), $data).chr(30);
$cache = Pluf::f('tmp_folder').'/IDF_Scm_Git-'.md5($this->repo).'.cache.db';
$fp = fopen($cache, 'ab');
if ($fp) {
flock($fp, LOCK_EX);
fwrite($fp, $data, strlen($data));
fclose($fp); // releases the lock too
return true;
}
return false;
}
}

View File

@ -39,4 +39,13 @@ class IDF_Tests_TestGit extends UnitTestCase
$this->assertEqual('Fixed the middleware to correctly return a 404 error if the project is', $log[0]->title);
}
public function testGitCache()
{
$repo = substr(dirname(__FILE__), 0, -strlen('src/IDF/Tests')).'/.git';
$repo = '/home/loa/Vendors/linux-git/.git';
$git = new IDF_Scm_Git($repo);
$git->buildBlobInfoCache();
//$git->getCachedBlobInfo(array());
}
}

View File

@ -102,32 +102,31 @@ class IDF_Views_Source
public $treeBase_precond = array('IDF_Precondition::accessSource');
public function treeBase($request, $match)
{
$title = sprintf(__('%1$s %2$s Source Tree'), (string) $request->project,
$this->getScmType($request));
$title = sprintf(__('%1$s %2$s Source Tree'),
$request->project, $this->getScmType($request));
$scm = IDF_Scm::get($request->project);
$commit = $match[2];
$branches = $scm->getBranches();
if (count($branches) == 0) {
// Redirect to the project home
if (!$scm->isAvailable()) {
$url = Pluf_HTTP_URL_urlForView('IDF_Views_Source::help',
array($request->project->shortname));
return new Pluf_HTTP_Response_Redirect($url);
}
if ('commit' != $scm->testHash($commit)) {
// Redirect to the first branch
$commit = $match[2];
$cobject = $scm->getCommit($commit);
if (!$cobject) {
$url = Pluf_HTTP_URL_urlForView('IDF_Views_Source::treeBase',
array($request->project->shortname,
$branches[0]));
$scm->getMainBranch()));
return new Pluf_HTTP_Response_Redirect($url);
}
$branches = $scm->getBranches();
$cache = Pluf_Cache::factory();
$key = sprintf('Project:%s::IDF_Views_Source::treeBase:%s::',
$request->project->id, $commit);
if (null === ($res=$cache->get($key))) {
$res = new Pluf_Template_ContextVars($scm->filesAtCommit($commit));
$res = new Pluf_Template_ContextVars($scm->getTree($commit));
$cache->set($key, $res);
}
$cobject = $scm->getCommit($commit);
$tree_in = in_array($commit, $branches);
$scmConf = $request->conf->getVal('scm', 'git');
$props = null;
@ -151,15 +150,22 @@ class IDF_Views_Source
public $tree_precond = array('IDF_Precondition::accessSource');
public function tree($request, $match)
{
$title = sprintf(__('%1$s %2$s Source Tree'), (string) $request->project,
$this->getScmType($request));
$title = sprintf(__('%1$s %2$s Source Tree'),
$request->project, $this->getScmType($request));
$scm = IDF_Scm::get($request->project);
if (!$scm->isAvailable()) {
$url = Pluf_HTTP_URL_urlForView('IDF_Views_Source::help',
array($request->project->shortname));
return new Pluf_HTTP_Response_Redirect($url);
}
$branches = $scm->getBranches();
$commit = $match[2];
$request_file = $match[3];
$fburl = Pluf_HTTP_URL_urlForView('IDF_Views_Source::treeBase',
array($request->project->shortname,
$branches[0]));
$scm->getMainBranch()));
if (substr($request_file, -1) == '/') {
$request_file = substr($request_file, 0, -1);
$url = Pluf_HTTP_URL_urlForView('IDF_Views_Source::tree',
@ -167,19 +173,23 @@ class IDF_Views_Source
$request_file));
return new Pluf_HTTP_Response_Redirect($url, 301);
}
if ('commit' != $scm->testHash($commit, $request_file)) {
// Redirect to the first branch
return new Pluf_HTTP_Response_Redirect($fburl);
}
$request_file_info = $scm->getFileInfo($request_file, $commit);
if (!$request_file_info) {
// Redirect to the first branch
return new Pluf_HTTP_Response_Redirect($fburl);
}
if ($request_file_info->type != 'tree') {
$info = self::getRequestedFileMimeType($request_file_info,
$commit, $scm);
if (!self::isText($info)) {
$rep = new Pluf_HTTP_Response($scm->getBlob($request_file_info, $commit),
$info[0]);
$rep->headers['Content-Disposition'] = 'attachment; filename="'.$info[1].'"';
@ -195,7 +205,9 @@ class IDF_Views_Source
return $this->viewFile($request, $match, $extra);
}
}
$bc = self::makeBreadCrumb($request->project, $commit, $request_file_info->file);
$page_title = $bc.' - '.$title;
$cobject = $scm->getCommit($commit);
$tree_in = in_array($commit, $branches);
@ -204,11 +216,13 @@ class IDF_Views_Source
$key = sprintf('Project:%s::IDF_Views_Source::tree:%s::%s',
$request->project->id, $commit, $request_file);
if (null === ($res=$cache->get($key))) {
$res = new Pluf_Template_ContextVars($scm->filesAtCommit($commit, $request_file));
$res = new Pluf_Template_ContextVars($scm->getTree($commit, $request_file));
$cache->set($key, $res);
}
} catch (Exception $e) {
return new Pluf_HTTP_Response_Redirect($fburl);
throw $e;
// return new Pluf_HTTP_Response_Redirect($fburl);
}
// try to find the previous level if it exists.
$prev = split('/', $request_file);